Home
last modified time | relevance | path

Searched refs:va2c1x0 (Results 1 – 25 of 56) sorted by relevance

123

/external/XNNPACK/src/qs8-igemm/gen/
D3x16c4-minmax-rndnu-neon-mlal-ld1r.c220 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va21x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local
225 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
235 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
245 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
255 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
265 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
275 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
285 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
295 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
D3x16c4-minmax-rndnu-neon-mlal-ld2r.c214 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va2x0.val[1]); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local
219 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
229 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
239 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
249 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
259 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
269 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
279 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
289 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
D3x16c4-minmax-rndnu-neon-mlal-dup.c214 const int8x8_t va2c1x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local
219 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
229 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
239 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
249 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
259 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
269 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
279 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
289 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
D3x8c4-minmax-rndnu-neon-mlal-dup.c154 const int8x8_t va2c1x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() local
159 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
169 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
179 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
189 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
D3x8c4-minmax-rndnu-neon-mlal-ld1r.c160 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va21x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() local
165 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
175 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
185 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
195 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
D3x8c4-minmax-rndnu-neon-mlal-ld2r.c154 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va2x0.val[1]); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() local
159 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
169 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
179 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
189 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c266 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va21x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
273 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
286 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
299 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
312 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
325 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
338 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
351 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
364 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-dup.c258 const int8x8_t va2c1x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
265 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
278 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
291 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
304 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
317 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
330 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
343 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
356 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c258 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va2x0.val[1]); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
265 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
278 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
291 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
304 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
317 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
330 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
343 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
356 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D4x8c4-minmax-rndnu-neon-mlal-ld1r.c190 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va21x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
197 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
210 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
223 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
236 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
D4x8c4-minmax-rndnu-neon-mlal-dup.c182 const int8x8_t va2c1x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 1)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
189 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
202 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
215 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
228 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c182 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va2x0.val[1]); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
189 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
202 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
215 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
228 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
/external/XNNPACK/src/qs8-gemm/gen/
D3x16c4-minmax-rndnu-neon-mlal-ld2r.c199 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va2x0.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r() local
204 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
214 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
224 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
234 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
244 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
254 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
264 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
274 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r()
D3x16c4-minmax-rndnu-neon-mlal-dup.c199 const int8x8_t va2c1x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup() local
204 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
214 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
224 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
234 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
244 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
254 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
264 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
274 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup()
D3x16c4-minmax-rndnu-neon-mlal-ld1r.c205 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va21x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r() local
210 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
220 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
230 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
240 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
250 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
260 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
270 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
280 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r()
D3x8c4-minmax-rndnu-neon-mlal-ld2r.c139 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va2x0.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r() local
144 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
154 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
164 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
174 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r()
D3x8c4-minmax-rndnu-neon-mlal-ld1r.c145 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va21x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r() local
150 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
160 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
170 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
180 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r()
D3x8c4-minmax-rndnu-neon-mlal-dup.c139 const int8x8_t va2c1x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup() local
144 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
154 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
164 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
174 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-dup.c241 const int8x8_t va2c1x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
248 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
261 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
274 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
287 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
300 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
313 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
326 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
339 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c249 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va21x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
256 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
269 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
282 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
295 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
308 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
321 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
334 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
347 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c241 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va2x0.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
248 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
261 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
274 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
287 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
300 int16x8_t vprod2x89c1 = vmull_s8(vb89c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
313 int16x8_t vprod2xABc1 = vmull_s8(vbABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
326 int16x8_t vprod2xCDc1 = vmull_s8(vbCDc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
339 int16x8_t vprod2xEFc1 = vmull_s8(vbEFc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c165 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va2x0.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
172 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
185 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
198 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
211 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
D4x8c4-minmax-rndnu-neon-mlal-dup.c165 const int8x8_t va2c1x0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va2x0), 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
172 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
185 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
198 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
211 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
D4x8c4-minmax-rndnu-neon-mlal-ld1r.c173 const int8x8_t va2c1x0 = vreinterpret_s8_s32(va21x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
180 int16x8_t vprod2x01c1 = vmull_s8(vb01c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
193 int16x8_t vprod2x23c1 = vmull_s8(vb23c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
206 int16x8_t vprod2x45c1 = vmull_s8(vb45c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
219 int16x8_t vprod2x67c1 = vmull_s8(vb67c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
D3x16c2-minmax-rndnu-neon-mlal-dup.c145 const int8x8_t va2c1x0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va2x0), 1)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup() local
150 int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
160 int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
170 int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()
180 int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1x0, va2c1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup()

123