/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 85 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 86 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 87 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 121 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 122 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 79 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 80 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 81 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 113 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 115 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 117 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 170 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 172 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 101 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 102 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 103 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 169 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 170 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 141 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 144 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 147 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 219 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 222 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 106 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 108 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 110 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 95 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 96 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 97 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mull-padal.c | 72 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() local 73 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 133 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 136 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 139 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 169 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 173 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 177 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 268 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 272 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 137 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 139 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 141 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 250 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 252 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 130 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 132 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 134 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 160 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 164 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 168 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 96 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 97 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 98 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 132 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 133 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 90 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 91 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 92 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 126 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 128 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 130 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 183 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 185 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 112 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 113 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 114 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 180 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 181 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 119 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 121 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 123 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 106 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 107 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 108 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 156 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 159 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 162 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 234 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 237 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mull-padal.c | 83 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() local 84 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 148 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 151 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 154 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 150 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 152 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 154 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 263 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 265 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 186 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 190 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 194 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 285 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 289 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 177 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 181 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 185 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|