Home
last modified time | relevance | path

Searched refs:vzero_shift_mask (Results 1 – 7 of 7) sorted by relevance

/external/XNNPACK/src/q8-vadd/
Dneon.c28 const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_vadd_ukernel__neon() local
68 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon()
69 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon()
70 vacc2_lo = vsraq_n_s32(vacc2_lo, vbicq_s32(vacc2_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon()
71 vacc3_lo = vsraq_n_s32(vacc3_lo, vbicq_s32(vacc3_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon()
72 vacc0_hi = vsraq_n_s32(vacc0_hi, vbicq_s32(vacc0_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon()
73 vacc1_hi = vsraq_n_s32(vacc1_hi, vbicq_s32(vacc1_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon()
74 vacc2_hi = vsraq_n_s32(vacc2_hi, vbicq_s32(vacc2_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon()
75 vacc3_hi = vsraq_n_s32(vacc3_hi, vbicq_s32(vacc3_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon()
129 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon()
[all …]
/external/XNNPACK/src/q8-dwconv/
Dup8x9-neon.c186 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_dwconv_ukernel_up8x9__neon() local
187 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon()
188 vacc0_hi = vsraq_n_s32(vacc0_hi, vbicq_s32(vacc0_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon()
189 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon()
190 vacc1_hi = vsraq_n_s32(vacc1_hi, vbicq_s32(vacc1_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon()
191 vacc2_lo = vsraq_n_s32(vacc2_lo, vbicq_s32(vacc2_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon()
192 vacc2_hi = vsraq_n_s32(vacc2_hi, vbicq_s32(vacc2_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon()
344 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_dwconv_ukernel_up8x9__neon() local
345 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon()
346 vacc0_hi = vsraq_n_s32(vacc0_hi, vbicq_s32(vacc0_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon()
[all …]
Dup8x9-aarch32-neon.S61 # Compute vzero_shift_mask
62 # - q11 = vzero_shift_mask
/external/XNNPACK/src/q8-igemm/
D8x8-neon.c515 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_igemm_ukernel_8x8__neon() local
516 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon()
517 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon()
518 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon()
519 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon()
520 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon()
521 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon()
522 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon()
523 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon()
524 vacc4x0123 = vsraq_n_s32(vacc4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_8x8__neon()
[all …]
D4x8-neon.c331 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_igemm_ukernel_4x8__neon() local
332 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon()
333 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon()
334 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon()
335 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon()
336 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon()
337 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon()
338 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon()
339 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_q8_igemm_ukernel_4x8__neon()
/external/XNNPACK/src/q8-gemm/
D8x8-neon.c470 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_gemm_ukernel_8x8__neon() local
471 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon()
472 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon()
473 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon()
474 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon()
475 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon()
476 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon()
477 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon()
478 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon()
479 vacc4x0123 = vsraq_n_s32(vacc4x0123, vbicq_s32(vacc4x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_8x8__neon()
[all …]
D4x8-neon.c294 … const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); in xnn_q8_gemm_ukernel_4x8__neon() local
295 vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon()
296 vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon()
297 vacc1x0123 = vsraq_n_s32(vacc1x0123, vbicq_s32(vacc1x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon()
298 vacc1x4567 = vsraq_n_s32(vacc1x4567, vbicq_s32(vacc1x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon()
299 vacc2x0123 = vsraq_n_s32(vacc2x0123, vbicq_s32(vacc2x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon()
300 vacc2x4567 = vsraq_n_s32(vacc2x4567, vbicq_s32(vacc2x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon()
301 vacc3x0123 = vsraq_n_s32(vacc3x0123, vbicq_s32(vacc3x0123, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon()
302 vacc3x4567 = vsraq_n_s32(vacc3x4567, vbicq_s32(vacc3x4567, vzero_shift_mask), 31); in xnn_q8_gemm_ukernel_4x8__neon()