/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-neonfma-rr1-lut64-p2-div-x8.c | 60 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local 66 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 67 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-div-x8.c | 59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local 65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 66 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr2recps-x8.c | 59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local 65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 66 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
|
D | vsigmoid-neonfma-rr1-lut64-p2-nr2recps-x8.c | 60 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() local 66 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 67 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
|
D | vsigmoid-neonfma-rr1-lut64-p2-nr1recps1fma-x8.c | 60 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local 66 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 67 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c | 59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local 65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 66 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
|
D | vsigmoid-neon-rr2-lut2048-p1-nr2recps-x8.c | 60 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local 66 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 67 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr2fma-x8.c | 59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() local 65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 66 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
|
D | vsigmoid-neon-rr2-lut64-p2-nr2recps-x8.c | 61 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() local 67 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() 68 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8()
|
D | vsigmoid-neonfma-rr1-lut64-p2-nr2fma-x8.c | 60 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() local 66 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() 67 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8()
|
D | vsigmoid-neonfma-rr1-lut64-p2-div-x12.c | 65 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() local 75 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 76 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-div-x12.c | 64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() local 74 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 75 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr2recps-x12.c | 64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() local 74 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 75 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c | 64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() local 74 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 75 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-rr1-lut64-p2-x8-acc2.c | 63 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8_acc2() local 69 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8_acc2() 70 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8_acc2()
|
D | neonfma-rr1-lut64-p2-x8.c | 62 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8() local 68 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8() 69 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8()
|
D | neon-rr2-lut64-p2-x8-acc2.c | 64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8_acc2() local 70 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8_acc2() 71 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8_acc2()
|
D | neon-rr2-lut64-p2-x8.c | 63 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8() local 69 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8() 70 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x8()
|
D | neonfma-rr1-lut64-p2-x12-acc2.c | 70 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc2() local 78 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc2() 79 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc2()
|
D | neonfma-rr1-lut64-p2-x12-acc3.c | 71 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc3() local 79 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc3() 80 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc3()
|
D | neonfma-rr1-lut64-p2-x12.c | 69 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12() local 77 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12() 78 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12()
|
D | neon-rr2-lut64-p2-x12.c | 70 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12() local 78 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12() 79 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12()
|
D | neon-rr2-lut64-p2-x12-acc2.c | 71 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12_acc2() local 79 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12_acc2() 80 const float32x4_t vl4567 = vcombine_f32(vl45, vl67); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12_acc2()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neonfma-rr1-lut16-p3-x8.c | 68 …int32x2_t vl67 = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() local 70 …vl67 = vld1_lane_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vid… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 71 const int32x4_t vl4567 = vcombine_s32(vl45, vl67); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8()
|
D | velu-neon-rr2-lut16-p3-x8.c | 69 …int32x2_t vl67 = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() local 71 …vl67 = vld1_lane_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vid… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 72 const int32x4_t vl4567 = vcombine_s32(vl45, vl67); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8()
|