/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-rndnu-neon-mlal.c | 85 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 86 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 87 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 121 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 122 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 85 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 86 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 87 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 121 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 122 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 86 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 87 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 88 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 122 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 123 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 1x8c16-minmax-rndnu-neon-mlal.c | 79 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() local 80 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() 81 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 114 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 116 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 118 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 171 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 173 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 113 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 115 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 117 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 170 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 172 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-rndnu-neon-mlal.c | 113 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 115 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 117 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 170 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 172 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal()
|
D | 1x16c8-minmax-rndnu-neon-mlal.c | 101 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 102 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 103 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 169 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 170 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
|
D | 2x8c16-minmax-rndnu-neon-mlal.c | 106 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal() local 108 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal() 110 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x8c8-minmax-fp32-neon-mlal.c | 85 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 86 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 87 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 121 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 122 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 86 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 87 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 88 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 122 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 123 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 113 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 115 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 117 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 170 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 172 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 114 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 116 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 118 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 171 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 173 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 97 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 98 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 99 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 133 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 134 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 1x8c8-minmax-rndnu-neon-mlal.c | 96 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 97 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 98 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() 132 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal() local 133 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neon-mlal.c | 96 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 97 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 98 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 132 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 133 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c16-minmax-rndnu-neon-mlal.c | 90 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() local 91 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal() 92 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal()
|
D | 2x8c8-minmax-rndnu-neon-mlal.c | 126 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 128 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 130 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() 183 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal() local 185 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 126 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 128 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 130 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 183 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 185 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 127 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 129 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 131 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 184 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 186 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 1x16c8-minmax-rndnu-neon-mlal.c | 112 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 113 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 114 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() 180 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal() local 181 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 1x8c8-minmax-fp32-neon-mlal.c | 96 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 97 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 98 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() 132 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal() local 133 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal()
|
D | 1x8c8-minmax-fp32-neonv8-mlal.c | 97 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 98 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 99 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() 133 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal() local 134 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neonv8-mlal.c | 127 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 129 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 131 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() 184 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal() local 186 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal()
|
D | 2x8c8-minmax-fp32-neon-mlal.c | 126 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 128 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 130 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() 183 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal() local 185 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal()
|