/external/XNNPACK/src/q8-vadd/ |
D | neon.c | 28 const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_vadd_ukernel__neon() local 68 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 69 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 70 vacc2_lo = vsraq_n_s32(vacc2_lo, vbicq_s32(vacc2_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 71 vacc3_lo = vsraq_n_s32(vacc3_lo, vbicq_s32(vacc3_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 72 vacc0_hi = vsraq_n_s32(vacc0_hi, vbicq_s32(vacc0_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 73 vacc1_hi = vsraq_n_s32(vacc1_hi, vbicq_s32(vacc1_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 74 vacc2_hi = vsraq_n_s32(vacc2_hi, vbicq_s32(vacc2_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 75 vacc3_hi = vsraq_n_s32(vacc3_hi, vbicq_s32(vacc3_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 129 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() [all …]
|
/external/XNNPACK/src/q8-dwconv/ |
D | up8x9-neon.c | 186 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_dwconv_ukernel_up8x9__neon() local 187 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 188 vacc0_hi = vsraq_n_s32(vacc0_hi, vbicq_s32(vacc0_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 189 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 190 vacc1_hi = vsraq_n_s32(vacc1_hi, vbicq_s32(vacc1_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 191 vacc2_lo = vsraq_n_s32(vacc2_lo, vbicq_s32(vacc2_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 192 vacc2_hi = vsraq_n_s32(vacc2_hi, vbicq_s32(vacc2_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 344 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_dwconv_ukernel_up8x9__neon() local 345 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 346 vacc0_hi = vsraq_n_s32(vacc0_hi, vbicq_s32(vacc0_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() [all …]
|
D | up8x9-aarch32-neon.S | 61 # Compute vzero_shift_mask 62 # - q11 = vzero_shift_mask
|
/external/XNNPACK/src/q8-igemm/ |
D | 8x8-neon.c | 515 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_igemm_ukernel_8x8__neon() local 516 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon() 517 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon() 518 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon() 519 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon() 520 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon() 521 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon() 522 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon() 523 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon() 524 vacc4x0123 = vsraq_n_s32(vacc4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon() [all …]
|
D | 4x8-neon.c | 331 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_igemm_ukernel_4x8__neon() local 332 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon() 333 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon() 334 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon() 335 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon() 336 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon() 337 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon() 338 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon() 339 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon()
|
/external/XNNPACK/src/q8-gemm/ |
D | 8x8-neon.c | 470 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_gemm_ukernel_8x8__neon() local 471 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon() 472 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon() 473 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon() 474 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon() 475 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon() 476 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon() 477 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon() 478 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon() 479 vacc4x0123 = vsraq_n_s32(vacc4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon() [all …]
|
D | 4x8-neon.c | 294 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_gemm_ukernel_4x8__neon() local 295 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon() 296 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon() 297 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon() 298 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon() 299 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon() 300 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon() 301 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon() 302 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon()
|