/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x16c4-minmax-neondot.c | 395 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 396 …const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 397 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 398 …const int16x8_t vacc1x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x89AB), vacc1xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 399 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 400 …const int16x8_t vacc2x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x89AB), vacc2xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 401 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 402 …const int16x8_t vacc3x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x89AB), vacc3xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 403 …const int16x8_t vacc4x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x0123), vacc4x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 404 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() [all …]
|
D | 6x16c4-minmax-neondot.c | 319 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 320 …const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 321 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 322 …const int16x8_t vacc1x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x89AB), vacc1xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 323 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 324 …const int16x8_t vacc2x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x89AB), vacc2xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 325 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 326 …const int16x8_t vacc3x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x89AB), vacc3xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 327 …const int16x8_t vacc4x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x0123), vacc4x4567), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 328 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-neondot.c | 243 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 244 …const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 245 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 246 …const int16x8_t vacc1x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x89AB), vacc1xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 247 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 248 …const int16x8_t vacc2x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x89AB), vacc2xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 249 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 250 …const int16x8_t vacc3x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x89AB), vacc3xCDEF), v… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 257 …const int16x8_t vacc0x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x0123), vqmovn_s32(vacc0x… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 258 …const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x89AB), vqmovn_s32(vacc0x… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() [all …]
|
D | 8x8c4-minmax-neondot.c | 277 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 278 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 279 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 280 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 281 …const int16x8_t vacc4x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x0123), vacc4x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 282 …const int16x8_t vacc5x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x0123), vacc5x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 283 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 284 …const int16x8_t vacc7x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc7x0123), vacc7x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 291 …const int16x8_t vacc0x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x0123), vqmovn_s32(vacc0x… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 292 …const int16x8_t vacc1x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc1x0123), vqmovn_s32(vacc1x… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() [all …]
|
D | 6x8c4-minmax-neondot.c | 229 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 230 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 231 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 232 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 233 …const int16x8_t vacc4x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x0123), vacc4x4567), v… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 234 …const int16x8_t vacc5x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x0123), vacc5x4567), v… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 240 …const int16x8_t vacc0x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x0123), vqmovn_s32(vacc0x… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 241 …const int16x8_t vacc1x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc1x0123), vqmovn_s32(vacc1x… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 242 …const int16x8_t vacc2x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc2x0123), vqmovn_s32(vacc2x… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 243 …const int16x8_t vacc3x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc3x0123), vqmovn_s32(vacc3x… in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() [all …]
|
D | 4x8c4-minmax-neondot.c | 181 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 182 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 183 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 184 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 189 …const int16x8_t vacc0x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x0123), vqmovn_s32(vacc0x… in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 190 …const int16x8_t vacc1x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc1x0123), vqmovn_s32(vacc1x… in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 191 …const int16x8_t vacc2x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc2x0123), vqmovn_s32(vacc2x… in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 192 …const int16x8_t vacc3x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc3x0123), vqmovn_s32(vacc3x… in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x16c4-minmax-neondot.c | 371 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 372 …const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 373 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 374 …const int16x8_t vacc1x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x89AB), vacc1xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 375 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 376 …const int16x8_t vacc2x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x89AB), vacc2xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 377 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 378 …const int16x8_t vacc3x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x89AB), vacc3xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 379 …const int16x8_t vacc4x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x0123), vacc4x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 380 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() [all …]
|
D | 6x16c4-minmax-neondot.c | 299 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 300 …const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 301 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 302 …const int16x8_t vacc1x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x89AB), vacc1xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 303 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 304 …const int16x8_t vacc2x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x89AB), vacc2xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 305 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 306 …const int16x8_t vacc3x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x89AB), vacc3xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 307 …const int16x8_t vacc4x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x0123), vacc4x4567), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 308 …const int16x8_t vacc4x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x89AB), vacc4xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-neondot.c | 227 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 228 …const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 229 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 230 …const int16x8_t vacc1x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x89AB), vacc1xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 231 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 232 …const int16x8_t vacc2x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x89AB), vacc2xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 233 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 234 …const int16x8_t vacc3x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x89AB), vacc3xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 241 …const int16x8_t vacc0x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x0123), vqmovn_s32(vacc0x… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 242 …const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x89AB), vqmovn_s32(vacc0x… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() [all …]
|
D | 8x8c4-minmax-neondot.c | 253 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 254 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 255 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 256 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 257 …const int16x8_t vacc4x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x0123), vacc4x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 258 …const int16x8_t vacc5x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x0123), vacc5x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 259 …const int16x8_t vacc6x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc6x0123), vacc6x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 260 …const int16x8_t vacc7x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc7x0123), vacc7x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 267 …const int16x8_t vacc0x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x0123), vqmovn_s32(vacc0x… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 268 …const int16x8_t vacc1x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc1x0123), vqmovn_s32(vacc1x… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() [all …]
|
D | 6x8c4-minmax-neondot.c | 209 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 210 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 211 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 212 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 213 …const int16x8_t vacc4x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc4x0123), vacc4x4567), v… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 214 …const int16x8_t vacc5x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc5x0123), vacc5x4567), v… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 220 …const int16x8_t vacc0x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x0123), vqmovn_s32(vacc0x… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 221 …const int16x8_t vacc1x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc1x0123), vqmovn_s32(vacc1x… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 222 …const int16x8_t vacc2x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc2x0123), vqmovn_s32(vacc2x… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 223 …const int16x8_t vacc3x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc3x0123), vqmovn_s32(vacc3x… in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() [all …]
|
D | 4x8c4-minmax-neondot.c | 165 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 166 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 167 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 168 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 173 …const int16x8_t vacc0x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x0123), vqmovn_s32(vacc0x… in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 174 …const int16x8_t vacc1x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc1x0123), vqmovn_s32(vacc1x… in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 175 …const int16x8_t vacc2x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc2x0123), vqmovn_s32(vacc2x… in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 176 …const int16x8_t vacc3x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc3x0123), vqmovn_s32(vacc3x… in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
|
D | 4x16c2-minmax-neon-mull-padal-dup.c | 417 …const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 418 …const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 419 …const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 420 …const int16x8_t vacc1x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x89AB), vacc1xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 421 …const int16x8_t vacc2x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x0123), vacc2x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 422 …const int16x8_t vacc2x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2x89AB), vacc2xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 423 …const int16x8_t vacc3x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x0123), vacc3x4567), v… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 424 …const int16x8_t vacc3x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3x89AB), vacc3xCDEF), v… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 431 …const int16x8_t vacc0x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x0123), vqmovn_s32(vacc0x… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() 432 …const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0x89AB), vqmovn_s32(vacc0x… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup() [all …]
|
/external/libhevc/common/arm/ |
D | ihevc_quant_iquant_ssd_neon_intr.c | 203 q0 = vqmovn_s32(qtmp_0); in ihevc_quant_iquant_ssd_flat_scale_mat_neon() 204 q1 = vqmovn_s32(qtmp_1); in ihevc_quant_iquant_ssd_flat_scale_mat_neon() 205 q2 = vqmovn_s32(qtmp_2); in ihevc_quant_iquant_ssd_flat_scale_mat_neon() 206 q3 = vqmovn_s32(qtmp_3); in ihevc_quant_iquant_ssd_flat_scale_mat_neon() 261 iq0 = vqmovn_s32(iqtmp_0); in ihevc_quant_iquant_ssd_flat_scale_mat_neon() 262 iq1 = vqmovn_s32(iqtmp_1); in ihevc_quant_iquant_ssd_flat_scale_mat_neon() 263 iq2 = vqmovn_s32(iqtmp_2); in ihevc_quant_iquant_ssd_flat_scale_mat_neon() 264 iq3 = vqmovn_s32(iqtmp_3); in ihevc_quant_iquant_ssd_flat_scale_mat_neon() 480 q_00 = vcombine_s16(vqmovn_s32(q_tmp0), vqmovn_s32(q_tmp1)); in ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_neon() 481 q_01 = vcombine_s16(vqmovn_s32(q_tmp2), vqmovn_s32(q_tmp3)); in ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_neon() [all …]
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-neon-ld64-x32.c | 77 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 78 …const int16x8_t vacc89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc89AB), vqmovn_s32(vaccCDEF))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 79 …const int16x8_t vaccGHIJKLMN = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccGHIJ), vqmovn_s32(vaccKLMN))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 80 …const int16x8_t vaccOPQRSTUV = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32() 109 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32()
|
D | minmax-neon-ld64-x24.c | 69 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 70 …const int16x8_t vacc89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc89AB), vqmovn_s32(vaccCDEF))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 71 …const int16x8_t vaccGHIJKLMN = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccGHIJ), vqmovn_s32(vaccKLMN))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24() 100 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
|
D | minmax-neon-ld64-x16.c | 61 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 62 …const int16x8_t vacc89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc89AB), vqmovn_s32(vaccCDEF))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() 87 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
|
/external/XNNPACK/src/qu8-vadd/ |
D | minmax-neon.c | 87 …const int16x8_t vacc0 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0_lo), vacc0_hi), vy_zero_point); in xnn_qu8_vadd_minmax_ukernel__neon() 88 …const int16x8_t vacc1 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1_lo), vacc1_hi), vy_zero_point); in xnn_qu8_vadd_minmax_ukernel__neon() 89 …const int16x8_t vacc2 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc2_lo), vacc2_hi), vy_zero_point); in xnn_qu8_vadd_minmax_ukernel__neon() 90 …const int16x8_t vacc3 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc3_lo), vacc3_hi), vy_zero_point); in xnn_qu8_vadd_minmax_ukernel__neon() 140 …const int16x8_t vacc0 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0_lo), vqmovn_s32(vacc0_hi)), vy_ze… in xnn_qu8_vadd_minmax_ukernel__neon() 141 …const int16x8_t vacc1 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc1_lo), vqmovn_s32(vacc1_hi)), vy_ze… in xnn_qu8_vadd_minmax_ukernel__neon() 182 const int16x8_t vacc = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc_lo), vacc_hi), vy_zero_point); in xnn_qu8_vadd_minmax_ukernel__neon() 184 …const int16x8_t vacc = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc_lo), vqmovn_s32(vacc_hi)), vy_zero_… in xnn_qu8_vadd_minmax_ukernel__neon() 225 const int16x8_t vacc = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc_lo), vacc_hi), vy_zero_point); in xnn_qu8_vadd_minmax_ukernel__neon() 227 …const int16x8_t vacc = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc_lo), vqmovn_s32(vacc_hi)), vy_zero_… in xnn_qu8_vadd_minmax_ukernel__neon()
|
/external/XNNPACK/src/qu8-requantization/ |
D | q31-neon.c | 83 …const int16x8_t xy_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(x_scaled), y_scaled), vzero_poin… in xnn_qu8_requantize_q31__neon() 84 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qu8_requantize_q31__neon() 87 …const int16x8_t xy_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(x_scaled), vqmovn_s32(y_scaled)), v… in xnn_qu8_requantize_q31__neon() 88 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qu8_requantize_q31__neon()
|
/external/XNNPACK/src/qs8-requantization/ |
D | q31-neon.c | 83 …const int16x8_t xy_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(x_scaled), y_scaled), vzero_poin… in xnn_qs8_requantize_q31__neon() 84 …const int16x8_t zw_packed = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(z_scaled), w_scaled), vzero_poin… in xnn_qs8_requantize_q31__neon() 87 …const int16x8_t xy_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(x_scaled), vqmovn_s32(y_scaled)), v… in xnn_qs8_requantize_q31__neon() 88 …const int16x8_t zw_packed = vqaddq_s16(vcombine_s16(vqmovn_s32(z_scaled), vqmovn_s32(w_scaled)), v… in xnn_qs8_requantize_q31__neon()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-neon-ld64-x32.c | 89 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 90 …const int16x8_t vacc89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc89AB), vqmovn_s32(vaccCDEF))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 91 …const int16x8_t vaccGHIJKLMN = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccGHIJ), vqmovn_s32(vaccKLMN))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 92 …const int16x8_t vaccOPQRSTUV = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32() 126 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32()
|
D | minmax-neon-ld64-x24.c | 77 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 78 …const int16x8_t vacc89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc89AB), vqmovn_s32(vaccCDEF))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 79 …const int16x8_t vaccGHIJKLMN = vqaddq_s16(vcombine_s16(vqmovn_s32(vaccGHIJ), vqmovn_s32(vaccKLMN))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() 113 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
|
D | minmax-neon-ld64-x16.c | 65 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 66 …const int16x8_t vacc89ABCDEF = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc89AB), vqmovn_s32(vaccCDEF))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() 96 …const int16x8_t vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567))… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
|
/external/gemmlowp/internal/ |
D | output_neon.h | 38 int16x4_t res_16 = vqmovn_s32(input.reg[0]); 58 vcombine_s16(vqmovn_s32(input.reg[0]), vqmovn_s32(input.reg[1])); 77 vcombine_s16(vqmovn_s32(input.reg[0]), vqmovn_s32(input.reg[1])); 79 vcombine_s16(vqmovn_s32(input.reg[2]), vqmovn_s32(input.reg[3])); 100 res_16[i] = vcombine_s16(vqmovn_s32(input.reg[2 * i]), 101 vqmovn_s32(input.reg[2 * i + 1])); 122 int16x4_t res_16 = vqmovn_s32(input.reg[0]); 142 vcombine_s16(vqmovn_s32(input.reg[0]), vqmovn_s32(input.reg[1])); 161 vcombine_s16(vqmovn_s32(input.reg[0]), vqmovn_s32(input.reg[1])); 163 vcombine_s16(vqmovn_s32(input.reg[2]), vqmovn_s32(input.reg[3])); [all …]
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/integer_ops/ |
D | mul.h | 108 const auto p1_narrowed = vqmovn_s32(p1); in MulElementwise() 109 const auto p2_narrowed = vqmovn_s32(p2); in MulElementwise() 110 const auto p3_narrowed = vqmovn_s32(p3); in MulElementwise() 111 const auto p4_narrowed = vqmovn_s32(p4); in MulElementwise() 201 const auto p1_narrowed = vqmovn_s32(p1); in MulSimpleBroadcast() 202 const auto p2_narrowed = vqmovn_s32(p2); in MulSimpleBroadcast() 203 const auto p3_narrowed = vqmovn_s32(p3); in MulSimpleBroadcast() 204 const auto p4_narrowed = vqmovn_s32(p4); in MulSimpleBroadcast()
|