Home
last modified time | relevance | path

Searched refs:va0x0 (Results 1 – 25 of 276) sorted by relevance

12345678910>>...12

/external/XNNPACK/src/qs8-gemm/gen/
D1x8c2s4-minmax-fp32-neon-mlal.c49 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local
61 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
65 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
69 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
71 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
75 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
79 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
81 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
85 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
89 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
[all …]
D1x8c2s4-minmax-fp32-neonv8-mlal.c50 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local
62 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
66 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
70 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
72 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
76 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
80 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
82 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
86 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
90 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
[all …]
D1x8c2s4-minmax-rndnu-neon-mlal.c49 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local
61 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
65 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
69 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
71 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
75 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
79 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
81 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
85 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
89 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
[all …]
D1x16c2s4-minmax-rndnu-neon-mlal.c51 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local
71 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
75 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
79 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
83 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
87 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
89 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
93 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
97 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
101 int16x8_t vprod0xCDEFc1 = vmull_s8(vbCDEFc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
[all …]
D1x16c2s4-minmax-rndnu-neon-mull.c51 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local
70 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
72 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
74 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
76 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
78 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
79 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
81 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
83 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
85 int16x8_t vprod0xCDEFc1 = vmull_s8(vbCDEFc1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
[all …]
D1x16c4s2-minmax-rndnu-neon-mlal.c55 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal() local
75 int16x8_t vprod0x01c0 = vmull_s8(vb01c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal()
79 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal()
83 int16x8_t vprod0x45c0 = vmull_s8(vb45c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal()
87 int16x8_t vprod0x67c0 = vmull_s8(vb67c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal()
91 int16x8_t vprod0x89c0 = vmull_s8(vb89c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal()
95 int16x8_t vprod0xABc0 = vmull_s8(vbABc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal()
99 int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal()
103 int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal()
107 va0x0 = vext_s8(va0x0, va0x0, 4); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal()
[all …]
D2x8c2s4-minmax-rndnu-neon-mlal.c57 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local
71 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
78 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
85 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
89 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
96 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
103 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
107 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
114 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
121 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
[all …]
D2x8c2s4-minmax-fp32-neonv8-mlal.c58 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local
72 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
79 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
86 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
90 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
97 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
104 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
108 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
115 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
122 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
[all …]
D2x8c2s4-minmax-fp32-neon-mlal.c57 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local
71 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
78 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
85 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
89 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
96 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
103 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
107 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
114 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
121 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D1x8c2s4-minmax-fp32-neonv8-mlal.c61 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local
73 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
77 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
81 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
83 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
87 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
91 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
93 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
97 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
101 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
[all …]
D1x8c2s4-minmax-rndnu-neon-mlal.c60 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local
72 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
76 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
80 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
82 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
86 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
90 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
92 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
96 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
100 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
[all …]
D1x8c2s4-minmax-fp32-neon-mlal.c60 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local
72 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
76 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
80 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
82 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
86 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
90 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
92 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
96 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
100 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
[all …]
D1x16c2s4-minmax-rndnu-neon-mlal.c62 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal() local
82 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
86 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
90 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
94 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
98 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
100 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
104 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
108 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
112 int16x8_t vprod0xCDEFc1 = vmull_s8(vbCDEFc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal()
[all …]
D1x16c2s4-minmax-rndnu-neon-mull.c62 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull() local
81 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
83 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
85 int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
87 int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
89 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
90 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
92 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
94 int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
96 int16x8_t vprod0xCDEFc1 = vmull_s8(vbCDEFc1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull()
[all …]
D2x8c2s4-minmax-fp32-neonv8-mlal.c71 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local
85 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
92 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
99 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
103 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
110 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
117 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
121 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
128 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
135 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
[all …]
D2x8c2s4-minmax-rndnu-neon-mlal.c70 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local
84 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
91 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
98 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
102 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
109 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
116 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
120 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
127 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
134 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
[all …]
D2x8c2s4-minmax-fp32-neon-mlal.c70 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local
84 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
91 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
98 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
102 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
109 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
116 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
120 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
127 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
134 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
[all …]
/external/XNNPACK/src/qc8-igemm/gen/
D1x8c2s4-minmax-fp32-neonv8-mlal.c61 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local
73 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
77 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
81 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
83 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
87 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
91 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
93 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
97 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
101 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
[all …]
D1x8c2s4-minmax-fp32-neon-mlal.c60 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local
72 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
76 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
80 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
82 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
86 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
90 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
92 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
96 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
100 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
[all …]
D2x8c2s4-minmax-fp32-neon-mlal.c70 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local
84 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
91 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
98 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
102 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
109 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
116 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
120 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
127 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
134 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
[all …]
D2x8c2s4-minmax-fp32-neonv8-mlal.c71 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local
85 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
92 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
99 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
103 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
110 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
117 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
121 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
128 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
135 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
[all …]
/external/XNNPACK/src/qc8-gemm/gen/
D1x8c2s4-minmax-fp32-neonv8-mlal.c50 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local
62 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
66 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
70 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
72 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
76 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
80 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
82 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
86 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
90 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
[all …]
D1x8c2s4-minmax-fp32-neon-mlal.c49 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local
61 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
65 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
69 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
71 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
75 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
79 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
81 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
85 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
89 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
[all …]
D2x8c2s4-minmax-fp32-neon-mlal.c57 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local
71 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
78 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
85 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
89 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
96 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
103 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
107 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
114 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
121 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
[all …]
D2x8c2s4-minmax-fp32-neonv8-mlal.c58 int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local
72 int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
79 int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
86 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
90 int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
97 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
104 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
108 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
115 int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
122 va0x0 = vext_s8(va0x0, va0x0, 2); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
[all …]

12345678910>>...12