/external/XNNPACK/src/f16-velu/gen/ |
D | velu-neonfp16arith-rr1-p3-x8.c | 27 …const float16x8_t vprescale = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.pr… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x8() 28 …const float16x8_t vsat_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x8() 29 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x8() 30 …const float16x8_t vlog2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.log2e… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x8() 31 …const float16x8_t vminus_ln2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.m… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x8() 32 const float16x8_t vc3 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.c3)); in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x8() 33 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.c2)); in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x8() 34 …const float16x8_t vminus_alpha = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x8() 35 …const float16x8_t vbeta = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.beta)); in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x8()
|
D | velu-neonfp16arith-rr1-p3-x16.c | 27 …const float16x8_t vprescale = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.pr… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16() 28 …const float16x8_t vsat_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16() 29 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16() 30 …const float16x8_t vlog2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.log2e… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16() 31 …const float16x8_t vminus_ln2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.m… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16() 32 const float16x8_t vc3 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.c3)); in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16() 33 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.c2)); in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16() 34 …const float16x8_t vminus_alpha = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3… in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16() 35 …const float16x8_t vbeta = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.beta)); in xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16()
|
/external/XNNPACK/src/f16-vsigmoid/gen/ |
D | vsigmoid-neonfp16arith-rr2-p2-div-x8.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x8() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x8() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x8() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x8() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x8() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x8() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x8()
|
D | vsigmoid-neonfp16arith-rr2-p2-nr1fma-x8.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x8() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x8() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x8() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x8() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x8() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x8() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x8()
|
D | vsigmoid-neonfp16arith-rr2-p2-nr1recps-x8.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x8() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x8() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x8() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x8() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x8() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x8() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x8()
|
D | vsigmoid-neonfp16arith-rr2-p2-div-x16.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x16() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x16() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x16() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x16() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x16() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x16() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x16()
|
D | vsigmoid-neonfp16arith-rr2-p2-nr1recps-x16.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x16() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x16() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x16() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x16() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x16() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x16() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x16()
|
D | vsigmoid-neonfp16arith-rr2-p2-nr1fma-x16.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x16()
|
D | vsigmoid-neonfp16arith-rr2-p2-div-x24.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x24() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x24() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x24() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x24() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x24() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x24() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x24()
|
D | vsigmoid-neonfp16arith-rr2-p2-nr1recps-x24.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x24() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x24() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x24() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x24() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x24() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x24() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x24()
|
D | vsigmoid-neonfp16arith-rr2-p2-nr1fma-x24.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x24()
|
D | vsigmoid-neonfp16arith-rr2-p2-div-x32.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x32() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x32() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x32() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x32() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x32() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x32() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x32()
|
D | vsigmoid-neonfp16arith-rr2-p2-nr1fma-x32.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x32() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x32() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x32() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x32() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x32() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x32() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x32()
|
D | vsigmoid-neonfp16arith-rr2-p2-div-x40.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40()
|
D | vsigmoid-neonfp16arith-rr2-p2-nr1recps-x32.c | 26 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x32() 27 …const float16x8_t vminus_log2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x32() 28 …const float16x8_t vln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x32() 29 …const float16x8_t vln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.ln2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x32() 30 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x32() 31 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x32() 33 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x32()
|
/external/XNNPACK/src/f16-velu/ |
D | neonfp16arith-rr1-p3.c.in | 26 …const float16x8_t vprescale = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.pr… 27 …const float16x8_t vsat_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.… 28 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.… 29 …const float16x8_t vlog2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.log2e… 30 …const float16x8_t vminus_ln2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.m… 31 const float16x8_t vc3 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.c3)); 32 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.c2)); 33 …const float16x8_t vminus_alpha = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3… 34 …const float16x8_t vbeta = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr1_p3.beta));
|
/external/XNNPACK/src/f16-raddstoreexpminusmax/gen/ |
D | neonfp16arith-rr2-p2-x32-acc2.c | 29 …const float16x8_t vlog2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.log2e… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc2() 30 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc2() 31 …const float16x8_t vminus_ln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc2() 32 …const float16x8_t vminus_ln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc2() 33 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc2() 34 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc2() 35 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc2()
|
D | neonfp16arith-rr2-p2-x32.c | 29 …const float16x8_t vlog2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.log2e… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32() 30 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32() 31 …const float16x8_t vminus_ln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32() 32 …const float16x8_t vminus_ln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32() 33 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32() 34 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32() 35 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32()
|
D | neonfp16arith-rr2-p2-x40-acc2.c | 29 …const float16x8_t vlog2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.log2e… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 30 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 31 …const float16x8_t vminus_ln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 32 …const float16x8_t vminus_ln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 33 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 34 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 35 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2()
|
D | neonfp16arith-rr2-p2-x32-acc4.c | 29 …const float16x8_t vlog2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.log2e… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc4() 30 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc4() 31 …const float16x8_t vminus_ln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc4() 32 …const float16x8_t vminus_ln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc4() 33 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc4() 34 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc4() 35 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32_acc4()
|
D | neonfp16arith-rr2-p2-x40.c | 29 …const float16x8_t vlog2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.log2e… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 30 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 31 …const float16x8_t vminus_ln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 32 …const float16x8_t vminus_ln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 33 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 34 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 35 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40()
|
D | neonfp16arith-rr2-p2-x48.c | 29 …const float16x8_t vlog2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.log2e… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 30 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 31 …const float16x8_t vminus_ln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 32 …const float16x8_t vminus_ln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 33 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 34 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 35 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48()
|
D | neonfp16arith-rr2-p2-x40-acc5.c | 29 …const float16x8_t vlog2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.log2e… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 30 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 31 …const float16x8_t vminus_ln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 32 …const float16x8_t vminus_ln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 33 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 34 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 35 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_… in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5()
|
/external/XNNPACK/src/f16-raddstoreexpminusmax/ |
D | neonfp16arith-rr2-p2.c.in | 29 …const float16x8_t vlog2e = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.log2e… 30 …const float16x8_t vmagic_bias = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.… 31 …const float16x8_t vminus_ln2_hi = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… 32 …const float16x8_t vminus_ln2_lo = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p… 33 const float16x8_t vc2 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c2)); 34 const float16x8_t vc1 = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_p2.c1)); 35 …const float16x8_t vdenorm_cutoff = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neonfp16arith_rr2_…
|
/external/XNNPACK/src/f16-vhswish/gen/ |
D | vhswish-neonfp16arith-x8.c | 30 const float16x8_t vsixth = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neon.sixth)); in xnn_f16_vhswish_ukernel__neonfp16arith_x8() 31 const float16x8_t vthree = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->neon.three)); in xnn_f16_vhswish_ukernel__neonfp16arith_x8() 32 const int16x8_t vsix = vreinterpretq_s16_u16(vld1q_dup_u16(¶ms->neon.six)); in xnn_f16_vhswish_ukernel__neonfp16arith_x8()
|