Home
last modified time | relevance | path

Searched refs:vsum1x45 (Results 1 – 25 of 36) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mull-padal.c125 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
130 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
157 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
159 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c16-minmax-neon-mlal-padal.c141 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
146 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
173 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
175 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c200 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
205 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
232 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
234 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c156 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
165 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
194 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
196 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c180 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
189 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
218 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
220 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c187 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
200 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
231 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
233 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c185 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
196 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
239 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
241 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c257 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
266 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
295 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
297 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c217 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
228 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
271 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
273 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c219 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
232 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
263 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
265 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c314 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
327 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
358 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
360 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c240 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
259 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
306 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local
308 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c324 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
335 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
378 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
380 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mull-padal.c141 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
146 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
173 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
175 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c16-minmax-neon-mlal-padal.c157 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
162 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
189 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
191 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c216 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
221 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
248 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
250 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c174 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
183 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
212 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
214 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c198 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
207 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
236 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
238 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mull-padal.c207 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
220 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
251 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
253 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D2x16c8-minmax-neon-mull-padal.c201 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
212 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
255 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
257 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c275 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
284 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
313 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
315 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c239 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
252 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
283 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
285 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c233 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
244 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
287 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
289 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D3x16c8-minmax-neon-mull-padal.c258 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
277 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
324 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local
326 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
D2x16c8-minmax-neon-mlal-padal.c340 const int32x4_t vsum1x45 = vpaddq_s32(vacc1x4, vacc1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
351 int32x4_t vacc1x4567 = vpaddq_s32(vsum1x45, vsum1x67); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
394 const int32x2_t vsum1x45 = vpadd_s32(vpsum1x4, vpsum1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
396 int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 ); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()

12