/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-rndnu-neon-mlal.c | 97 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 161 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 162 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 163 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mlal.c | 113 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 195 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 196 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 197 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 198 int16x8_t vprod3x6 = vmull_s8(vb6x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 82 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 128 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 129 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 81 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 127 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 128 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-rndnu-neon-mlal.c | 81 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 127 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 128 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal()
|
D | 1x8c8-minmax-rndnu-neon-mlal.c | 65 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 93 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 65 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 93 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 66 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 94 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c8-minmax-rndnu-neon-mlal.c | 130 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 212 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 213 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 214 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 215 int16x8_t vprod3x6 = vmull_s8(vb6x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 2x8c8-minmax-rndnu-neon-mlal.c | 94 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 140 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 141 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 94 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 140 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 141 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 95 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 141 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 142 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 3x8c8-minmax-rndnu-neon-mlal.c | 112 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 176 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 177 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 178 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 77 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 105 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 1x8c8-minmax-rndnu-neon-mlal.c | 76 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 104 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 76 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 104 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 136 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 208 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 209 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 210 int16x8_t vprod2x6 = vmull_s8(vb6x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 2x8c8-minmax-fp32-neon-mlal.c | 81 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 127 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 128 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 82 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 128 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 129 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 65 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 93 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 66 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 94 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 95 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 141 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 142 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 94 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 140 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 141 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 76 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 104 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 77 const int8x8_t vb6x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 105 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|