/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-rndnu-neon-mlal.c | 73 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 74 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 75 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 112 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 113 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 73 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 74 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 75 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 112 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 113 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 74 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 75 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 76 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 113 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 114 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 1x8c16-minmax-rndnu-neon-mlal.c | 70 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() local 71 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() 72 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 93 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 95 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 97 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 156 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 158 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 92 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 94 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 96 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 155 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 157 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-rndnu-neon-mlal.c | 92 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 94 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 96 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 155 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 157 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal()
|
D | 1x16c8-minmax-rndnu-neon-mlal.c | 89 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 90 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 91 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 160 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 161 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
|
D | 2x8c16-minmax-rndnu-neon-mlal.c | 88 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal() local 90 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal() 92 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x8c8-minmax-fp32-neon-mlal.c | 73 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 74 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 75 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 112 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 113 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 74 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 75 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 76 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 113 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 114 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 92 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 94 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 96 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 155 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 157 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 93 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 95 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 97 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 156 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 158 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 85 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 86 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 87 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 124 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 125 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 1x8c8-minmax-rndnu-neon-mlal.c | 84 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 85 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 86 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 123 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 124 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 84 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 85 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 86 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 123 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 124 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c16-minmax-rndnu-neon-mlal.c | 81 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() local 82 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() 83 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal()
|
D | 2x8c8-minmax-rndnu-neon-mlal.c | 105 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 107 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 109 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 168 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 170 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 105 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 107 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 109 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 168 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 170 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 106 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 108 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 110 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 169 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 171 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 1x16c8-minmax-rndnu-neon-mlal.c | 100 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 101 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 102 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 171 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 172 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 1x8c8-minmax-fp32-neon-mlal.c | 84 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 85 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 86 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 123 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 124 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 85 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 86 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 87 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 124 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 125 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 106 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 108 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 110 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 169 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 171 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 105 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 107 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 109 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 168 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 170 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|