Home
last modified time | relevance | path

Searched refs:vacc0x14 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D1x16c8-minmax-neon-mull-padal.c58 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
111 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
127 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
156 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c58 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
143 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
200 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
216 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
245 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c58 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
127 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
143 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
172 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c64 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
80 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
163 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
182 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
223 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c64 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
80 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
211 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
302 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
321 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
362 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c64 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
80 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
194 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
214 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
255 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c70 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
86 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
102 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
215 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
237 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
290 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c70 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
86 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
102 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
279 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
404 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
426 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
479 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c76 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
92 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
108 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
124 int32x4_t vacc3x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
267 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
292 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
357 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c70 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
86 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
102 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
261 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
285 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
338 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c76 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
92 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
108 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
124 int32x4_t vacc3x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
328 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
356 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
421 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c76 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
92 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
108 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
124 int32x4_t vacc3x14 = vacc0x14; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
347 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
506 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
531 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
596 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D1x16c8-minmax-neon-mull-padal.c61 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
122 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
141 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
170 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c61 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
154 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
211 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
230 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
259 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c61 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
138 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
157 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
186 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c65 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
81 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
176 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
198 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
239 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c65 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
81 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
224 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
315 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
337 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
378 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c69 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
85 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
101 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
230 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
255 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
308 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c16-minmax-neon-mlal-padal.c65 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
81 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
207 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
230 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
271 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c73 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
89 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
105 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
121 int32x4_t vacc3x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
284 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
312 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
377 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c69 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
85 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
101 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
276 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
303 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
356 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c69 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
85 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
101 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
294 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
419 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
444 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
497 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c73 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
89 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
105 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
121 int32x4_t vacc3x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
345 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
376 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
441 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c73 …int32x4_t vacc0x14 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + size… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
89 int32x4_t vacc1x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
105 int32x4_t vacc2x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
121 int32x4_t vacc3x14 = vacc0x14; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
364 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
523 vacc0x14 = vpadalq_s16(vacc0x14, vprod0x14); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
551 const int32x4_t vsum0xEF = vpaddq_s32(vacc0x14, vacc0x15); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
616 const int32x2_t vpsum0xE = vadd_s32(vget_low_s32(vacc0x14), vget_high_s32(vacc0x14)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()