/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-lut64-p2-x8-acc2.c | 82 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() local 87 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 93 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
|
D | neonfma-lut64-p2-x8-acc2.c | 81 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() local 86 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 92 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
|
D | neon-lut64-p2-x8.c | 81 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() local 86 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 92 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
|
D | neonfma-lut64-p2-x8.c | 80 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() local 85 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 91 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
|
D | neonfma-lut64-p2-x12-acc3.c | 86 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() local 94 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 102 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
|
D | neon-lut64-p2-x12-acc3.c | 87 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() local 95 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 103 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
|
D | neon-lut64-p2-x12-acc2.c | 86 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() local 94 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 102 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
|
D | neon-lut64-p2-x12.c | 85 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() local 93 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 101 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
|
D | neonfma-lut64-p2-x12-acc2.c | 85 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() local 93 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 101 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
|
D | neonfma-lut64-p2-x12.c | 84 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() local 92 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 100 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
|
D | neonfma-lut64-p2-x16-acc4.c | 91 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() local 102 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 112 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-lut64-p2-div-x8.c | 85 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local 87 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 93 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
|
D | neonfma-rr1-lut2048-p1-div-x8.c | 85 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local 87 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 93 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c | 85 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local 87 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 93 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x8.c | 85 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local 87 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 93 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x8.c | 85 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local 87 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 93 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
|
D | neonfma-rr1-lut64-p2-nr2recps-x8.c | 85 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() local 87 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 93 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x8.c | 85 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() local 87 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 93 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
|
D | neon-rr2-lut2048-p1-nr2recps-x8.c | 87 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local 89 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 95 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-lut2048-p1-div-x12.c | 90 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() local 92 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 102 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
|
D | neon-rr2-lut64-p2-nr2recps-x8.c | 87 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() local 89 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() 95 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8()
|
D | neonfma-rr1-lut64-p2-div-x12.c | 90 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() local 92 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 102 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
|
D | neonfma-rr1-lut64-p2-nr2fma-x8.c | 85 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() local 87 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() 93 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c | 90 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() local 92 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 102 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x12.c | 90 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() local 92 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 102 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
|