Home
last modified time | relevance | path

Searched refs:vsum1x67 (Results 1 – 25 of 36) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mull-padal.c126 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
130 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
158 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
159 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c16-minmax-neon-mlal-padal.c142 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
146 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
174 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
175 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c201 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
205 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
233 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
234 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c157 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
165 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
195 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
196 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c181 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
189 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
219 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
220 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c188 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
200 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
232 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
233 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c186 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
196 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
240 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
241 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c258 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
266 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
296 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
297 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c218 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
228 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
272 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
273 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c220 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
232 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
264 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
265 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c315 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
327 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
359 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
360 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c241 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
259 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
307 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
308 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c325 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
335 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
379 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
380 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mull-padal.c142 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
146 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
174 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
175 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c16-minmax-neon-mlal-padal.c158 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
162 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
190 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
191 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c217 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
221 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
249 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
250 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c175 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
183 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
213 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
214 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c199 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
207 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
237 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
238 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c208 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
220 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
252 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
253 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c202 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
212 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
256 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
257 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c276 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
284 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
314 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
315 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c240 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
252 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
284 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
285 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c234 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
244 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
288 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
289 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c259 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
277 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
325 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
326 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c341 const int32x4_t vsum1x67 = vpaddq_s32(vacc1x6, vacc1x7); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
351 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
395 const int32x2_t vsum1x67 = vpadd_s32(vpsum1x6, vpsum1x7); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
396 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()

12