/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-neon-mlal-padal.c | 96 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 151 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 152 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 153 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 112 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 182 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 183 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 184 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 185 int16x8_t vprod3x5 = vmull_s8(vb5x0, va3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 80 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 120 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 121 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 64 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 89 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 72 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 105 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 96 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 144 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 145 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 120 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 183 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 184 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 185 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 144 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 222 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 223 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 224 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 225 int16x8_t vprod3x5 = vmull_s8(vb5x0, va3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c8-minmax-neon-mlal-padal.c | 129 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 199 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 200 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 201 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 202 int16x8_t vprod3x5 = vmull_s8(vb5x0, va3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 93 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 133 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 134 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 111 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 166 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 167 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 168 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 75 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 100 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 83 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 116 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 109 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 157 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 158 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 161 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 239 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 240 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 241 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 242 int16x8_t vprod3x5 = vmull_s8(vb5x0, va3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 135 const int8x8_t vb5x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 198 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 199 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 200 int16x8_t vprod2x5 = vmull_s8(vb5x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|