/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 97 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 98 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 99 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 130 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 131 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 88 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 89 vprod0x7 = vmlal_s8(vprod0x7, vget_high_s8(vb7), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 90 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 134 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 136 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 138 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 185 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 187 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 113 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 114 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 115 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 178 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 179 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 171 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 174 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 177 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 240 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 243 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 124 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 126 vprod0x7 = vmlal_s8(vprod0x7, vget_high_s8(vb7), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 128 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 104 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 105 vprod0x7 = vmlal_s8(vprod0x7, vget_high_s8(vb7), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 106 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mull-padal.c | 81 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() local 82 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 160 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 163 vprod0x7 = vmlal_s8(vprod0x7, vget_high_s8(vb7), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 166 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 208 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 212 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 216 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 295 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 299 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 158 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 160 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 162 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 265 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 267 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 148 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 150 vprod0x7 = vmlal_s8(vprod0x7, vget_high_s8(vb7), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 152 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 196 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 200 vprod0x7 = vmlal_s8(vprod0x7, vget_high_s8(vb7), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 204 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 108 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 109 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 110 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 141 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 142 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 99 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 100 vprod0x7 = vmlal_s8(vprod0x7, vget_high_s8(vb7), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 101 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 147 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 149 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 151 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 198 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 200 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 124 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 125 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 126 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 189 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 190 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 137 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 139 vprod0x7 = vmlal_s8(vprod0x7, vget_high_s8(vb7), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 141 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 115 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 116 vprod0x7 = vmlal_s8(vprod0x7, vget_high_s8(vb7), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 117 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 186 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 189 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 192 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 255 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 258 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mull-padal.c | 92 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() local 93 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 175 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 178 vprod0x7 = vmlal_s8(vprod0x7, vget_high_s8(vb7), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 181 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 171 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 173 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 175 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 278 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 280 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 225 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 229 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 233 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 312 const int16x8_t vprod0x7 = vmull_s8(vb7, va0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 316 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 213 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 217 vprod0x7 = vmlal_s8(vprod0x7, vget_high_s8(vb7), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 221 vacc0x7 = vpadalq_s16(vacc0x7, vprod0x7); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|