/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-rndnu-neon-mlal.c | 93 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 121 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 122 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 123 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mlal.c | 109 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 143 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 144 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 145 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 146 int16x8_t vprod3x2 = vmull_s8(vb2x0, va3x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 78 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 100 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 101 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 77 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 99 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 100 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-rndnu-neon-mlal.c | 77 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 99 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 100 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal()
|
D | 1x8c8-minmax-rndnu-neon-mlal.c | 61 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 77 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 61 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 77 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 62 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 78 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8c8-minmax-rndnu-neon-mlal.c | 126 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 160 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 161 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 162 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 163 int16x8_t vprod3x2 = vmull_s8(vb2x0, va3x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 2x8c8-minmax-rndnu-neon-mlal.c | 90 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 112 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 113 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 90 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 112 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 113 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 91 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 113 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 114 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 3x8c8-minmax-rndnu-neon-mlal.c | 108 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 136 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 137 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 138 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 73 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 89 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 1x8c8-minmax-rndnu-neon-mlal.c | 72 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 88 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 72 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 88 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 132 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 168 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 169 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 170 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 2x8c8-minmax-fp32-neon-mlal.c | 77 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 99 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 100 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 78 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 100 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 101 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 61 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 77 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 62 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 78 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 91 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 113 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 114 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 90 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 112 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 113 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 72 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 88 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 73 const int8x8_t vb2x0 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof( int8_t)); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 89 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|