/external/XNNPACK/src/q8-vadd/ |
D | neon.c | 68 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 69 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 70 vacc2_lo = vsraq_n_s32(vacc2_lo, vbicq_s32(vacc2_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 71 vacc3_lo = vsraq_n_s32(vacc3_lo, vbicq_s32(vacc3_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 72 vacc0_hi = vsraq_n_s32(vacc0_hi, vbicq_s32(vacc0_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 73 vacc1_hi = vsraq_n_s32(vacc1_hi, vbicq_s32(vacc1_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 74 vacc2_hi = vsraq_n_s32(vacc2_hi, vbicq_s32(vacc2_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 75 vacc3_hi = vsraq_n_s32(vacc3_hi, vbicq_s32(vacc3_hi, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 129 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() 130 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_q8_vadd_ukernel__neon() [all …]
|
/external/XNNPACK/src/requantization/ |
D | q31-neon.c | 71 const int32x4_t x_adjusted_product = vsraq_n_s32(x_product, vbicq_s32(x, vshift_eq_0_mask), 31); in xnn_requantize_q31__neon() 72 const int32x4_t y_adjusted_product = vsraq_n_s32(y_product, vbicq_s32(y, vshift_eq_0_mask), 31); in xnn_requantize_q31__neon() 73 const int32x4_t z_adjusted_product = vsraq_n_s32(z_product, vbicq_s32(z, vshift_eq_0_mask), 31); in xnn_requantize_q31__neon() 74 const int32x4_t w_adjusted_product = vsraq_n_s32(w_product, vbicq_s32(w, vshift_eq_0_mask), 31); in xnn_requantize_q31__neon()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-lut64-p2-div-x24.c | 86 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 87 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 88 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 89 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 90 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 91 …const int32x4_t veKLMN = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnKLMN), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 275 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 354 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24()
|
D | neonfma-rr1-lut2048-p1-div-x24.c | 86 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 87 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 88 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 89 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 90 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 91 …const int32x4_t veKLMN = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnKLMN), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 267 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x7FF)))… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 344 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x7FF)))… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
|
D | neonfma-rr1-lut2048-p1-div-x20.c | 83 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 84 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 85 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 86 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 87 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 244 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x7FF)))… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 321 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x7FF)))… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20()
|
D | neonfma-rr1-lut2048-p1-div-x16.c | 80 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 81 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 82 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 83 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 221 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x7FF)))… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 298 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x7FF)))… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16()
|
D | neonfma-rr1-lut64-p2-div-x16.c | 80 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 81 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 82 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 83 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 227 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 306 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16()
|
D | neonfma-rr1-lut64-p2-div-x20.c | 83 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 84 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 85 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 86 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 87 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 251 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 330 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x24.c | 86 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 87 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 88 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 89 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 90 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 91 …const int32x4_t veKLMN = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnKLMN), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 299 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 387 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x24.c | 86 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 87 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 88 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 89 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 90 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 91 …const int32x4_t veKLMN = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnKLMN), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 291 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x7FF)))… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 377 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x7FF)))… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
|
D | neonfma-rr1-lut64-p2-nr2fma-x24.c | 86 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 87 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 88 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 89 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 90 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 91 …const int32x4_t veKLMN = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnKLMN), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 299 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 387 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x24.c | 86 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 87 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 88 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 89 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 90 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 91 …const int32x4_t veKLMN = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnKLMN), vmovq_n_s32(INT32_C(… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 291 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x7FF)))… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 377 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x7FF)))… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-lut64-p2-x20.c | 81 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 82 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 83 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 84 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 85 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 233 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 309 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
|
D | neonfma-lut64-p2-x20-acc5.c | 85 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 86 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 87 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 88 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 89 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 242 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 318 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
|
D | neonfma-lut64-p2-x20-acc2.c | 82 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 83 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 84 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 85 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 86 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 236 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 312 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
|
D | neon-lut64-p2-x20-acc2.c | 83 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 84 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 85 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 86 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 87 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 237 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 313 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
|
D | neon-lut64-p2-x20.c | 82 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 83 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 84 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 85 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 86 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 234 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 310 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
|
D | neon-lut64-p2-x20-acc5.c | 86 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 87 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 88 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 89 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 90 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 243 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 319 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
|
D | neonfma-lut64-p2-x16-acc4.c | 81 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 82 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 83 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 84 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 218 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 294 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
|
D | neon-lut64-p2-x16-acc2.c | 80 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 81 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 82 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 83 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 215 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 291 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
|
D | neon-lut64-p2-x16.c | 79 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 80 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 81 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 82 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 212 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 288 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
|
D | neonfma-lut64-p2-x16-acc2.c | 79 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 80 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 81 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 82 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 214 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 290 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
|
D | neon-lut64-p2-x16-acc4.c | 82 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 83 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 84 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 85 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 219 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 295 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
|
D | neonfma-lut64-p2-x16.c | 78 …const int32x4_t ve0123 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn0123), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 79 …const int32x4_t ve4567 = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn4567), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 80 …const int32x4_t ve89AB = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn89AB), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 81 …const int32x4_t veCDEF = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnCDEF), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 211 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 287 …const int32x4_t ve = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vn), vmovq_n_s32(INT32_C(0x3F))),… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
|
/external/XNNPACK/src/q8-dwconv/ |
D | up8x9-neon.c | 187 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 188 vacc0_hi = vsraq_n_s32(vacc0_hi, vbicq_s32(vacc0_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 189 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 190 vacc1_hi = vsraq_n_s32(vacc1_hi, vbicq_s32(vacc1_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 191 vacc2_lo = vsraq_n_s32(vacc2_lo, vbicq_s32(vacc2_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 192 vacc2_hi = vsraq_n_s32(vacc2_hi, vbicq_s32(vacc2_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 345 vacc0_lo = vsraq_n_s32(vacc0_lo, vbicq_s32(vacc0_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 346 vacc0_hi = vsraq_n_s32(vacc0_hi, vbicq_s32(vacc0_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 347 vacc1_lo = vsraq_n_s32(vacc1_lo, vbicq_s32(vacc1_lo, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() 348 vacc1_hi = vsraq_n_s32(vacc1_hi, vbicq_s32(vacc1_hi, vzero_shift_mask), 31); in xnn_q8_dwconv_ukernel_up8x9__neon() [all …]
|