/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-neon-mlal-padal.c | 95 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 141 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 142 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 143 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 111 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 169 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 170 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 171 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 172 int16x8_t vprod3x4 = vmull_s8(vb4x0, va3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 79 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 113 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 114 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 63 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 85 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 71 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 101 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 95 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 137 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 138 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 119 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 173 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 174 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 175 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 143 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 209 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 210 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 211 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 212 int16x8_t vprod3x4 = vmull_s8(vb4x0, va3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c8-minmax-neon-mlal-padal.c | 128 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 186 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 187 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 188 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 189 int16x8_t vprod3x4 = vmull_s8(vb4x0, va3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 92 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 126 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 127 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 110 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 156 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 157 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 158 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 74 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 96 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 82 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 112 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 108 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 150 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 151 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 160 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 226 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 227 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 228 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 229 int16x8_t vprod3x4 = vmull_s8(vb4x0, va3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 134 const int8x8_t vb4x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 188 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 189 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 190 int16x8_t vprod2x4 = vmull_s8(vb4x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|