Home
last modified time | relevance | path

Searched refs:va3c0 (Results 1 – 25 of 165) sorted by relevance

1234567

/external/XNNPACK/src/qs8-gemm/gen/
D4x16c4-minmax-rndnu-neon-mull-ld2r.c124 const int8x8_t va3c0 = vreinterpret_s8_s32(va3.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
129 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
137 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
145 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
153 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
161 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
169 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
177 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
185 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
329 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
[all …]
D4x16c4-minmax-rndnu-neon-mull-dup.c124 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local
129 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
137 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
145 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
153 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
161 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
169 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
177 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
185 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
329 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local
[all …]
D4x16c4-minmax-rndnu-neon-mull-ld1r.c128 const int8x8_t va3c0 = vreinterpret_s8_s32(va30); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
133 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
141 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
149 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
157 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
165 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
173 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
181 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
189 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
333 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
[all …]
D4x8c4-minmax-rndnu-neon-mull-ld2r.c100 const int8x8_t va3c0 = vreinterpret_s8_s32(va3.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
105 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
113 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
121 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
129 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
213 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
214 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
216 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
218 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
220 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c100 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
105 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
113 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
121 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
129 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
213 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
214 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
216 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
218 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
220 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c104 const int8x8_t va3c0 = vreinterpret_s8_s32(va30); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
109 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
117 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
125 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
133 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
217 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
218 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
220 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
222 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
224 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-dup.c380 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
385 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
393 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
401 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
409 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
417 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
425 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
433 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
441 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
585 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
[all …]
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c392 const int8x8_t va3c0 = vreinterpret_s8_s32(va30); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
397 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
405 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
413 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
421 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
429 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
437 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
445 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
453 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
597 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
[all …]
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c380 const int8x8_t va3c0 = vreinterpret_s8_s32(va3.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
385 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
393 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
401 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
409 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
417 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
425 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
433 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
441 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
585 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
[all …]
D4x8c4-minmax-rndnu-neon-mlal-ld2r.c244 const int8x8_t va3c0 = vreinterpret_s8_s32(va3.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
249 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
257 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
265 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
273 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
357 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r() local
358 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
360 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
362 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
364 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r()
D4x16c2-minmax-rndnu-neon-mull-ld2r.c111 const int8x8_t va3c0 = vreinterpret_s8_s16(va30.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local
116 const int16x8_t vprod3x0123c0 = vmull_s8(vb0123c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
124 const int16x8_t vprod3x4567c0 = vmull_s8(vb4567c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
132 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
140 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
298 const int8x8_t va3c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r() local
299 const int16x8_t vprod3x0123c0 = vmull_s8(vb0123c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
301 const int16x8_t vprod3x4567c0 = vmull_s8(vb4567c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
303 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
305 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r()
D4x16c2-minmax-rndnu-neon-mull-ld1r.c119 const int8x8_t va3c0 = vreinterpret_s8_s16(va30); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local
124 const int16x8_t vprod3x0123c0 = vmull_s8(vb0123c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
132 const int16x8_t vprod3x4567c0 = vmull_s8(vb4567c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
140 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
148 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
306 const int8x8_t va3c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r() local
307 const int16x8_t vprod3x0123c0 = vmull_s8(vb0123c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
309 const int16x8_t vprod3x4567c0 = vmull_s8(vb4567c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
311 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
313 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r()
D4x16c2-minmax-rndnu-neon-mull-dup.c107 const int8x8_t va3c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local
112 const int16x8_t vprod3x0123c0 = vmull_s8(vb0123c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
120 const int16x8_t vprod3x4567c0 = vmull_s8(vb4567c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
128 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
136 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
294 const int8x8_t va3c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va3), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local
295 const int16x8_t vprod3x0123c0 = vmull_s8(vb0123c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
297 const int16x8_t vprod3x4567c0 = vmull_s8(vb4567c0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
299 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
301 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
/external/XNNPACK/src/qs8-igemm/gen/
D4x16c4-minmax-rndnu-neon-mull-ld2r.c141 const int8x8_t va3c0 = vreinterpret_s8_s32(va3.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
146 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
154 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
162 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
170 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
178 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
186 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
194 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
202 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r()
346 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r() local
[all …]
D4x16c4-minmax-rndnu-neon-mull-ld1r.c145 const int8x8_t va3c0 = vreinterpret_s8_s32(va30); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
150 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
158 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
166 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
174 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
182 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
190 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
198 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
206 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r()
350 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r() local
[all …]
D4x16c4-minmax-rndnu-neon-mull-dup.c141 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local
146 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
154 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
162 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
170 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
178 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
186 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
194 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
202 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup()
346 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup() local
[all …]
D4x8c4-minmax-rndnu-neon-mull-ld2r.c117 const int8x8_t va3c0 = vreinterpret_s8_s32(va3.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
122 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
130 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
138 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
146 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
230 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r() local
231 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
233 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
235 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
237 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r()
D4x8c4-minmax-rndnu-neon-mull-dup.c117 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
122 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
130 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
138 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
146 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
230 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup() local
231 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
233 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
235 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
237 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mull-ld1r.c121 const int8x8_t va3c0 = vreinterpret_s8_s32(va30); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
126 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
134 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
142 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
150 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
234 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r() local
235 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
237 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
239 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
241 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r()
D4x16c4-minmax-rndnu-neon-mlal-ld1r.c409 const int8x8_t va3c0 = vreinterpret_s8_s32(va30); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
414 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
422 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
430 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
438 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
446 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
454 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
462 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
470 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r()
614 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r() local
[all …]
D4x16c4-minmax-rndnu-neon-mlal-dup.c397 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
402 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
410 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
418 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
426 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
434 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
442 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
450 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
458 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup()
602 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup() local
[all …]
D4x16c4-minmax-rndnu-neon-mlal-ld2r.c397 const int8x8_t va3c0 = vreinterpret_s8_s32(va3.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
402 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
410 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
418 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
426 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
434 const int16x8_t vprod3x89c0 = vmull_s8(vb89c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
442 const int16x8_t vprod3xABc0 = vmull_s8(vbABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
450 const int16x8_t vprod3xCDc0 = vmull_s8(vbCDc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
458 const int16x8_t vprod3xEFc0 = vmull_s8(vbEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r()
602 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r() local
[all …]
D4x16c2-minmax-rndnu-neon-mull-dup.c124 const int8x8_t va3c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local
129 const int16x8_t vprod3x0123c0 = vmull_s8(vb0123c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
137 const int16x8_t vprod3x4567c0 = vmull_s8(vb4567c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
145 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
153 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
311 const int8x8_t va3c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup() local
312 const int16x8_t vprod3x0123c0 = vmull_s8(vb0123c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
314 const int16x8_t vprod3x4567c0 = vmull_s8(vb4567c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
316 const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
318 const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup()
D4x8c4-minmax-rndnu-neon-mlal-ld1r.c273 const int8x8_t va3c0 = vreinterpret_s8_s32(va30); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
278 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
286 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
294 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
302 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
386 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r() local
387 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
389 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
391 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
393 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r()
D4x8c4-minmax-rndnu-neon-mlal-dup.c261 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
266 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
274 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
282 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
290 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
374 const int8x8_t va3c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va3), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup() local
375 const int16x8_t vprod3x01c0 = vmull_s8(vb01c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
377 const int16x8_t vprod3x23c0 = vmull_s8(vb23c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
379 const int16x8_t vprod3x45c0 = vmull_s8(vb45c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()
381 const int16x8_t vprod3x67c0 = vmull_s8(vb67c0, va3c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup()

1234567