/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 93 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 94 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 95 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 127 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 128 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 85 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 86 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 87 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 127 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 129 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 131 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 180 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 182 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 109 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 110 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 111 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 175 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 176 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 161 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 164 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 167 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 233 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 236 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 118 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 120 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 122 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 101 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 102 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 103 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mull-padal.c | 78 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() local 79 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 151 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 154 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 157 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 195 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 199 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 203 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 286 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 290 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 151 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 153 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 155 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 260 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 262 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 142 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 144 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 146 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 184 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 188 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 192 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 104 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 105 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 106 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 138 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 139 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 96 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 97 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 98 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 140 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 142 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 144 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 193 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 195 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 120 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 121 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 122 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 186 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 187 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 131 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 133 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 135 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 1x16c16-minmax-neon-mlal-padal.c | 112 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local 113 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 114 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 176 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 179 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 182 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 248 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 251 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 1x8c8-minmax-neon-mull-padal.c | 89 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() local 90 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 166 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 169 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 172 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 164 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 166 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 168 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 273 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 275 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 212 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 216 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 220 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 303 const int16x8_t vprod0x6 = vmull_s8(vb6, va0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 307 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 201 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 205 vprod0x6 = vmlal_s8(vprod0x6, vget_high_s8(vb6), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 209 vacc0x6 = vpadalq_s16(vacc0x6, vprod0x6); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|