/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-neon-mlal-padal.c | 130 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 134 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 135 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 136 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 155 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 160 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 161 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 162 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 163 vprod3x3 = vmlal_s8(vprod3x3, vb3x1, va3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 105 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 108 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 109 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 80 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 82 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 96 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 98 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 129 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 132 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 133 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 162 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 166 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 167 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 168 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 195 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 200 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 201 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 202 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 203 vprod3x3 = vmlal_s8(vprod3x3, vb3x1, va3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c8-minmax-neon-mlal-padal.c | 172 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 177 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 178 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 179 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 180 vprod3x3 = vmlal_s8(vprod3x3, vb3x1, va3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 118 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 121 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 122 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 145 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 149 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 150 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 151 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 91 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 93 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 107 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 109 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 142 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 145 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 146 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 212 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 217 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 218 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 219 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 220 vprod3x3 = vmlal_s8(vprod3x3, vb3x1, va3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 177 const int8x8_t vb3x1 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 181 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 182 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 183 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|