/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 103 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 176 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 177 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 178 int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 179 int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 249 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 294 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 295 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 296 int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 297 int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 3x8c2s4-minmax-rndnu-neon-mlal.c | 91 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 148 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 149 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 150 int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 206 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 241 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 242 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 243 int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 80 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 121 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 122 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 164 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 189 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 190 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 2x8c2s4-minmax-rndnu-neon-mlal.c | 79 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local 120 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 121 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 163 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local 188 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 189 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 79 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 120 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 121 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 163 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 188 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 189 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 68 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 93 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() 121 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 136 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-rndnu-neon-mlal.c | 67 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local 92 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() 120 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local 135 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 67 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 92 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() 120 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 135 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|
D | 4x8c2s4-minmax-rndnu-neon-mull.c | 99 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull() local 144 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull() 145 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull() 146 int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull() 147 int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c2s4-minmax-rndnu-neon-mlal.c | 86 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 159 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 160 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 161 int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 162 int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 232 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() local 277 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 278 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 279 int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal() 280 int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal()
|
D | 3x8c2s4-minmax-rndnu-neon-mlal.c | 76 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 133 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 134 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 135 int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 191 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() local 226 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 227 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal() 228 int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-rndnu-neon-mlal.c | 66 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local 107 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 108 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 150 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() local 175 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal() 176 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 67 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 108 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 109 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 151 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 176 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 177 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 66 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 107 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 108 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 150 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 175 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 176 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 56 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 81 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() 109 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 124 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 57 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 82 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() 110 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 125 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-rndnu-neon-mlal.c | 56 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local 81 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() 109 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal() local 124 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 66 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 107 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 108 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 150 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 175 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 176 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 67 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 108 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 109 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 151 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 176 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 177 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 57 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 82 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() 110 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 125 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 56 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 81 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() 109 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 124 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 2x8c2s4-minmax-fp32-neon-mlal.c | 79 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 120 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 121 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 163 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() local 188 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal() 189 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal()
|
D | 2x8c2s4-minmax-fp32-neonv8-mlal.c | 80 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 121 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 122 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 164 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() local 189 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal() 190 int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neonv8-mlal.c | 68 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 93 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() 121 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal() local 136 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal()
|
D | 1x8c2s4-minmax-fp32-neon-mlal.c | 67 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 92 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() 120 const int8x8_t vb0123c2x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal() local 135 int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal()
|