/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-neon-mlal-padal.c | 92 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 111 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 112 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 113 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 108 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 130 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 131 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 132 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 133 int16x8_t vprod3x1 = vmull_s8(vb1x0, va3x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 76 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 92 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 93 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 60 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 73 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 68 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 89 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 92 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 116 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 117 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 116 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 143 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 144 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 145 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 140 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 170 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 171 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 172 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 173 int16x8_t vprod3x1 = vmull_s8(vb1x0, va3x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c8-minmax-neon-mlal-padal.c | 125 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 147 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 148 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 149 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 150 int16x8_t vprod3x1 = vmull_s8(vb1x0, va3x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 89 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 105 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 106 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 107 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 126 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 127 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 128 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 71 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 84 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 79 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 100 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 105 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 129 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 130 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 157 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 187 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 188 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 189 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 190 int16x8_t vprod3x1 = vmull_s8(vb1x0, va3x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 131 const int8x8_t vb1x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 158 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 159 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 160 int16x8_t vprod2x1 = vmull_s8(vb1x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|