/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 91 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 112 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 113 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 114 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 115 int16x8_t vprod3x23c0 = vmull_s8(vb23c0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 221 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 237 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 238 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 239 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 240 int16x8_t vprod3x23c0 = vmull_s8(vb23c0x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 3x8c4s2-minmax-rndnu-neon-mlal.c | 79 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() local 97 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 98 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 99 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 182 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() local 196 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 197 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 198 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal()
|
D | 2x8c4s2-minmax-rndnu-neon-mlal.c | 67 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal() local 82 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal() 83 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal() 143 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal() local 155 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal() 156 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal()
|
D | 2x8c4s2-minmax-fp32-neonv8-mlal.c | 68 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() local 83 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 84 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 144 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() local 156 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 157 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal()
|
D | 2x8c4s2-minmax-fp32-neon-mlal.c | 67 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() local 82 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 83 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 143 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() local 155 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 156 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal()
|
D | 1x8c4s2-minmax-fp32-neon-mlal.c | 55 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() local 67 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() 104 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() local 114 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal()
|
D | 1x8c4s2-minmax-fp32-neonv8-mlal.c | 56 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() local 68 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() 105 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() local 115 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal()
|
D | 1x8c4s2-minmax-rndnu-neon-mlal.c | 55 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal() local 67 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal() 104 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal() local 114 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c4s2-minmax-rndnu-neon-mlal.c | 108 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 129 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 130 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 131 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 132 int16x8_t vprod3x23c0 = vmull_s8(vb23c0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 238 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() local 254 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 255 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 256 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal() 257 int16x8_t vprod3x23c0 = vmull_s8(vb23c0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal()
|
D | 2x8c4s2-minmax-fp32-neonv8-mlal.c | 81 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() local 96 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 97 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 157 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() local 169 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 170 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal()
|
D | 3x8c4s2-minmax-rndnu-neon-mlal.c | 94 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() local 112 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 113 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 114 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 197 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() local 211 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 212 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal() 213 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal()
|
D | 2x8c4s2-minmax-fp32-neon-mlal.c | 80 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() local 95 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 96 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 156 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() local 168 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 169 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal()
|
D | 2x8c4s2-minmax-rndnu-neon-mlal.c | 80 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal() local 95 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal() 96 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal() 156 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal() local 168 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal() 169 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal()
|
D | 1x8c4s2-minmax-fp32-neon-mlal.c | 66 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() local 78 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() 115 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() local 125 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal()
|
D | 1x8c4s2-minmax-fp32-neonv8-mlal.c | 67 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() local 79 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() 116 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() local 126 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal()
|
D | 1x8c4s2-minmax-rndnu-neon-mlal.c | 66 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal() local 78 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal() 115 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal() local 125 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal()
|
D | 4x8c4s2-minmax-rndnu-neon-mull.c | 104 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() local 120 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() 121 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() 122 int16x8_t vprod2x23c0 = vmull_s8(vb23c0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull() 123 int16x8_t vprod3x23c0 = vmull_s8(vb23c0x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 2x8c4s2-minmax-fp32-neonv8-mlal.c | 68 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() local 83 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 84 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 144 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() local 156 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 157 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal()
|
D | 2x8c4s2-minmax-fp32-neon-mlal.c | 67 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() local 82 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 83 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 143 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() local 155 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 156 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal()
|
D | 1x8c4s2-minmax-fp32-neonv8-mlal.c | 56 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() local 68 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() 105 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() local 115 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal()
|
D | 1x8c4s2-minmax-fp32-neon-mlal.c | 55 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() local 67 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() 104 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() local 114 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 2x8c4s2-minmax-fp32-neonv8-mlal.c | 81 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() local 96 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 97 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 157 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() local 169 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal() 170 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal()
|
D | 2x8c4s2-minmax-fp32-neon-mlal.c | 80 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() local 95 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 96 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 156 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() local 168 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal() 169 int16x8_t vprod1x23c0 = vmull_s8(vb23c0x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal()
|
D | 1x8c4s2-minmax-fp32-neonv8-mlal.c | 67 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() local 79 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() 116 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal() local 126 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal()
|
D | 1x8c4s2-minmax-fp32-neon-mlal.c | 66 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() local 78 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() 115 const int8x8_t vb23c0x0 = vld1_s8(w); w = (const int8_t*) w + 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal() local 125 int16x8_t vprod0x23c0 = vmull_s8(vb23c0x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal()
|