Home
last modified time | relevance | path

Searched refs:vacc0x12 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D1x16c8-minmax-neon-mull-padal.c56 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
105 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
126 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
154 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c56 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
135 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
194 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
215 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
243 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c56 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
121 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
142 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
170 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c62 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
78 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
153 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
181 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
221 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c62 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
78 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
197 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
292 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
320 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
360 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c62 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
78 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
182 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
213 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
253 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c68 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
84 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
100 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
201 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
236 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
288 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c68 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
84 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
100 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
259 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
390 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
425 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
477 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c74 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
90 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
106 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
122 int32x4_t vacc3x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
249 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
291 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
355 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c68 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
84 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
100 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
243 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
284 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
336 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c74 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
90 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
106 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
122 int32x4_t vacc3x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
304 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
355 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
419 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c74 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
90 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
106 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
122 int32x4_t vacc3x12 = vacc0x12; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
321 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
488 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
530 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
594 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D1x16c8-minmax-neon-mull-padal.c59 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
116 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
140 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
168 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c59 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
146 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
205 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
229 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
257 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c59 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
132 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
156 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
184 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c63 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
79 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
166 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
197 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
237 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c63 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
79 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
210 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
305 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
336 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
376 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c67 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
83 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
99 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
216 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
254 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
306 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c16-minmax-neon-mlal-padal.c63 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
79 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
195 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
229 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
269 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c71 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
87 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
103 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
119 int32x4_t vacc3x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
266 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
311 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
375 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c67 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
83 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
99 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
258 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
302 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
354 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c67 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
83 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
99 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
274 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
405 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
443 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
495 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c71 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
87 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
103 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
119 int32x4_t vacc3x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
321 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
375 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
439 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c71 …int32x4_t vacc0x12 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
87 int32x4_t vacc1x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
103 int32x4_t vacc2x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
119 int32x4_t vacc3x12 = vacc0x12; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
338 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
505 vacc0x12 = vpadalq_s16(vacc0x12, vprod0x12); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
550 const int32x4_t vsum0xCD = vpaddq_s32(vacc0x12, vacc0x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
614 const int32x2_t vpsum0xC = vadd_s32(vget_low_s32(vacc0x12), vget_high_s32(vacc0x12)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()