/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 102 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 155 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 156 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 157 int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 158 int16x8_t vprod3x4567c1 = vmull_s8(vb4567c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 248 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 282 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 283 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 284 int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 285 int16x8_t vprod3x4567c1 = vmull_s8(vb4567c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 3x8c2s4-minmax-rndnu-neon-mlal.c | 90 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 132 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 133 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 134 int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 205 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 232 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 233 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 234 int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 79 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 110 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 111 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 163 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 183 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 184 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 2x8c2s4-minmax-rndnu-neon-mlal.c | 78 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local 109 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 110 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 162 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local 182 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 183 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 78 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 109 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 110 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 162 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 182 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 183 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 67 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 87 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() 120 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 133 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-rndnu-neon-mlal.c | 66 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local 86 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() 119 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local 132 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 66 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 86 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() 119 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 132 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|
D | 4x8c2s4-minmax-rndnu-neon-mull.c | 98 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull() local 132 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull() 133 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull() 134 int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull() 135 int16x8_t vprod3x4567c1 = vmull_s8(vb4567c1x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 85 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 138 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 139 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 140 int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 141 int16x8_t vprod3x4567c1 = vmull_s8(vb4567c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 231 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 265 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 266 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 267 int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 268 int16x8_t vprod3x4567c1 = vmull_s8(vb4567c1x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 3x8c2s4-minmax-rndnu-neon-mlal.c | 75 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 117 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 118 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 119 int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 190 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 217 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 218 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 219 int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-rndnu-neon-mlal.c | 65 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local 96 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 97 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 149 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local 169 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 170 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 66 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 97 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 98 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 150 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 170 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 171 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 65 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 96 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 97 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 149 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 169 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 170 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 55 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 75 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() 108 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 121 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 56 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 76 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() 109 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 122 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-rndnu-neon-mlal.c | 55 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local 75 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() 108 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local 121 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 65 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 96 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 97 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 149 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 169 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 170 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 66 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 97 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 98 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 150 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 170 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 171 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 56 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 76 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() 109 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 122 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 55 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 75 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() 108 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 121 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 78 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 109 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 110 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 162 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 182 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 183 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 79 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 110 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 111 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 163 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 183 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 184 int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 67 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 87 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() 120 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 133 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 66 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 86 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() 119 const int8x8_t vb4567c1x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 132 int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|