/external/XNNPACK/src/math/ |
D | expminus-neonfma-lut2048-p1.c | 575 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_expminus__neonfma_lut2048_p1() local 577 float32x2_t vl23 = vld1_dup_f32(&exp2_k_over_2048_table[(uint32_t) vidx23]); in xnn_math_f32_expminus__neonfma_lut2048_p1() 579 vl23 = vld1_lane_f32(&exp2_k_over_2048_table[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_expminus__neonfma_lut2048_p1()
|
D | expminus-neonfma-lut64-p2.c | 79 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_expminus__neonfma_lut64_p2() local 81 float32x2_t vl23 = vld1_dup_f32(&exp2_k_over_64_table[(uint32_t) vidx23]); in xnn_math_f32_expminus__neonfma_lut64_p2() 83 vl23 = vld1_lane_f32(&exp2_k_over_64_table[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_expminus__neonfma_lut64_p2()
|
D | sigmoid-neonfma-rr1-lut2048-p1-div.c | 73 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_div() local 75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_div() 77 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_div()
|
D | sigmoid-neonfma-rr1-lut2048-p1-nr2recps.c | 73 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_nr2recps() local 75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_nr2recps() 77 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_nr2recps()
|
D | sigmoid-neonfma-rr1-lut2048-p1-nr2fma.c | 73 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_nr2fma() local 75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_nr2fma() 77 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_nr2fma()
|
D | sigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma.c | 73 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_nr1recps1fma() local 75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_nr1recps1fma() 77 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut2048_p1_nr1recps1fma()
|
D | sigmoid-neonfma-rr2-lut2048-p1-div.c | 74 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_div() local 76 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_div() 78 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_div()
|
D | sigmoid-neon-rr1-lut2048-p1-nr2recps.c | 73 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_sigmoid__neon_rr1_lut2048_p1_nr2recps() local 75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_math_f32_sigmoid__neon_rr1_lut2048_p1_nr2recps() 77 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_sigmoid__neon_rr1_lut2048_p1_nr2recps()
|
D | sigmoid-neonfma-rr2-lut2048-p1-nr2fma.c | 74 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_nr2fma() local 76 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_nr2fma() 78 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_nr2fma()
|
D | sigmoid-neon-rr2-lut2048-p1-nr2recps.c | 75 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_sigmoid__neon_rr2_lut2048_p1_nr2recps() local 77 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_math_f32_sigmoid__neon_rr2_lut2048_p1_nr2recps() 79 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_sigmoid__neon_rr2_lut2048_p1_nr2recps()
|
D | sigmoid-neonfma-rr2-lut2048-p1-nr1recps1fma.c | 74 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_nr1recps1fma() local 76 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_nr1recps1fma() 78 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_nr1recps1fma()
|
D | sigmoid-neonfma-rr2-lut2048-p1-nr2recps.c | 74 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_nr2recps() local 76 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_nr2recps() 78 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_sigmoid__neonfma_rr2_lut2048_p1_nr2recps()
|
D | exp-neonfma-lut64-p2.c | 87 const uint64_t vidx23 = vgetq_lane_u64(vidx, 1); in xnn_math_f32_exp__neonfma_lut64_p2() local 89 float32x2_t vl23 = vld1_dup_f32(&exp2_table[(uint32_t) vidx23]); in xnn_math_f32_exp__neonfma_lut64_p2() 91 vl23 = vld1_lane_f32(&exp2_table[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_math_f32_exp__neonfma_lut64_p2()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-lut64-p2-x8-acc2.c | 80 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() local 86 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 91 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
|
D | neonfma-lut64-p2-x8-acc2.c | 79 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() local 85 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 90 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
|
D | neon-lut64-p2-x8.c | 79 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() local 85 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 90 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
|
D | neonfma-lut64-p2-x8.c | 78 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() local 84 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 89 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-lut64-p2-div-x8.c | 82 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local 84 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 91 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
|
D | neonfma-rr1-lut2048-p1-div-x8.c | 82 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local 84 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 91 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c | 82 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local 84 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 91 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x8.c | 82 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local 84 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 91 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x8.c | 82 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local 84 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 91 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
|
D | neonfma-rr1-lut64-p2-nr2recps-x8.c | 82 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() local 84 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 91 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x8.c | 82 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() local 84 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 91 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
|
D | neon-rr2-lut2048-p1-nr2recps-x8.c | 84 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local 86 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 93 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
|