/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 89 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 90 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 91 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 124 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 125 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 82 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 83 vprod0x5 = vmlal_s8(vprod0x5, vget_high_s8(vb5), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 84 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 120 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 122 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 124 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 175 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 177 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 105 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 106 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 107 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 172 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 173 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 151 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 154 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 157 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 226 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 229 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 112 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 114 vprod0x5 = vmlal_s8(vprod0x5, vget_high_s8(vb5), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 116 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 98 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 99 vprod0x5 = vmlal_s8(vprod0x5, vget_high_s8(vb5), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 100 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mull-padal.c | 75 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() local 76 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 142 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 145 vprod0x5 = vmlal_s8(vprod0x5, vget_high_s8(vb5), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 148 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 182 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 186 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 190 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 277 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 281 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 144 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 146 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 148 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 255 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 257 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 136 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 138 vprod0x5 = vmlal_s8(vprod0x5, vget_high_s8(vb5), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 140 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 172 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 176 vprod0x5 = vmlal_s8(vprod0x5, vget_high_s8(vb5), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 180 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 100 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 101 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 102 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 135 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 136 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 93 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 94 vprod0x5 = vmlal_s8(vprod0x5, vget_high_s8(vb5), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 95 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 133 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 135 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 137 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 188 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 190 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 116 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 117 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 118 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 183 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 184 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 125 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 127 vprod0x5 = vmlal_s8(vprod0x5, vget_high_s8(vb5), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 129 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 109 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 110 vprod0x5 = vmlal_s8(vprod0x5, vget_high_s8(vb5), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 111 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 166 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 169 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 172 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 241 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 244 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mull-padal.c | 86 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() local 87 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 157 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 160 vprod0x5 = vmlal_s8(vprod0x5, vget_high_s8(vb5), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 163 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 157 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 159 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 161 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 268 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 270 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 199 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 203 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 207 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 294 const int16x8_t vprod0x5 = vmull_s8(vb5, va0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 298 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 189 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 193 vprod0x5 = vmlal_s8(vprod0x5, vget_high_s8(vb5), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 197 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|