/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-rndnu-neon-mlal.c | 93 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 94 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 95 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 127 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 128 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 93 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 94 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 95 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 127 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 128 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 94 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 95 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 96 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 128 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 129 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 1x8c16-minmax-rndnu-neon-mlal.c | 85 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() local 86 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() 87 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 128 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 130 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 132 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 181 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 183 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 127 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 129 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 131 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 180 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 182 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-rndnu-neon-mlal.c | 127 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 129 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 131 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 180 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 182 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal()
|
D | 1x16c8-minmax-rndnu-neon-mlal.c | 109 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 110 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 111 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 175 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 176 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
|
D | 2x8c16-minmax-rndnu-neon-mlal.c | 118 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal() local 120 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal() 122 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x8c8-minmax-fp32-neon-mlal.c | 93 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 94 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 95 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 127 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 128 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 94 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 95 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 96 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 128 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 129 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 127 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 129 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 131 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 180 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 182 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 128 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 130 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 132 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 181 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 183 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 105 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 106 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 107 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 139 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 140 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 1x8c8-minmax-rndnu-neon-mlal.c | 104 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 105 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 106 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 138 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 139 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 104 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 105 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 106 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 138 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 139 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c16-minmax-rndnu-neon-mlal.c | 96 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() local 97 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() 98 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal()
|
D | 2x8c8-minmax-rndnu-neon-mlal.c | 140 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 142 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 144 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 193 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 195 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 140 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 142 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 144 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 193 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 195 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 141 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 143 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 145 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 194 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 196 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 1x16c8-minmax-rndnu-neon-mlal.c | 120 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 121 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 122 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 186 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 187 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 1x8c8-minmax-fp32-neon-mlal.c | 104 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 105 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 106 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 138 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 139 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 105 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 106 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 107 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 139 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 140 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 141 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 143 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 145 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 194 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 196 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 140 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 142 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 144 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 193 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 195 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|