/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-rndnu-neon-mlal.c | 103 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 106 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 109 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 193 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 196 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mlal.c | 119 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 123 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 127 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 234 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 238 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 3x8c16-minmax-rndnu-neon-mlal.c | 99 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local 102 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 105 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
|
D | 4x8c16-minmax-rndnu-neon-mlal.c | 114 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local 118 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 122 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 135 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 138 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 141 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 305 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 308 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 131 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 134 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 137 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 3x8c8-minmax-rndnu-neon-mull.c | 92 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local 95 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 159 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 163 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 167 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 378 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 382 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 154 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 158 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 162 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mull.c | 107 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local 111 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 116 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 119 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 139 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 143 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x8c8-minmax-rndnu-neon-mlal.c | 118 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 121 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 124 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() 208 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local 211 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mlal.c | 136 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 140 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 144 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() 251 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local 255 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
|
D | 3x8c16-minmax-rndnu-neon-mlal.c | 114 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local 117 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() 120 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
|
D | 4x8c16-minmax-rndnu-neon-mlal.c | 131 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local 135 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() 139 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
|
D | 3x16c8-minmax-rndnu-neon-mlal.c | 150 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 153 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 156 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() 320 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local 323 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
|
D | 3x16c16-minmax-rndnu-neon-mlal.c | 146 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local 149 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() 152 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
|
D | 4x16c8-minmax-rndnu-neon-mlal.c | 176 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 180 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 184 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() 395 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local 399 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
|
D | 3x8c8-minmax-rndnu-neon-mull.c | 107 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local 110 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
|
D | 4x16c16-minmax-rndnu-neon-mlal.c | 171 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local 175 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() 179 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
|
D | 4x8c8-minmax-rndnu-neon-mull.c | 124 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local 128 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
|
D | 3x16c8-minmax-rndnu-neon-mull.c | 131 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local 134 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
|
D | 4x16c8-minmax-rndnu-neon-mull.c | 156 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local 160 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
|