Home
last modified time | relevance | path

Searched refs:vacc0x8 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D1x16c8-minmax-neon-mull-padal.c52 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
93 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
124 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
147 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c52 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
119 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
182 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
213 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
236 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c52 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
109 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
140 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
163 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c58 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
74 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
133 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
179 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
214 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c58 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
74 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
169 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
272 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
318 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
353 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c58 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
74 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
158 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
211 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
246 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c64 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
80 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
96 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
173 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
234 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
281 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c64 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
80 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
96 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
219 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
362 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
423 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
470 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c70 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local
86 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
102 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
118 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
213 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
289 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
348 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c64 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
80 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
96 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
207 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
282 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
329 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c70 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
86 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
102 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
118 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
256 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
353 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
412 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c70 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
86 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
102 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
118 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
269 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
452 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
528 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
587 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D1x16c8-minmax-neon-mull-padal.c55 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
104 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
138 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
161 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c55 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
130 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
193 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
227 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
250 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c55 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
120 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
154 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
177 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D2x16c8-minmax-neon-mull-padal.c59 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
75 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
146 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
195 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
230 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c59 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
75 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
182 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
285 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
334 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
369 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c63 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
79 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
95 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
188 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
252 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
299 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c16-minmax-neon-mlal-padal.c59 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
75 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
171 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
227 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
262 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mull-padal.c67 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local
83 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
99 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
115 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
230 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
309 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
368 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
D3x16c16-minmax-neon-mlal-padal.c63 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
79 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
95 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
222 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
300 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
347 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c63 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
79 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
95 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
234 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
377 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
441 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
488 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D4x16c16-minmax-neon-mlal-padal.c67 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
83 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
99 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
115 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
273 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
373 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
432 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c67 …int32x4_t vacc0x8 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
83 int32x4_t vacc1x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
99 int32x4_t vacc2x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
115 int32x4_t vacc3x8 = vacc0x8; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
286 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
469 vacc0x8 = vpadalq_s16(vacc0x8, vprod0x8); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
548 const int32x4_t vsum0x89 = vpaddq_s32(vacc0x8, vacc0x9); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
607 const int32x2_t vpsum0x8 = vadd_s32(vget_low_s32(vacc0x8), vget_high_s32(vacc0x8)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()