/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-lut2048-p1-div-x12.c | 67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() local 76 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 78 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
|
D | neonfma-rr1-lut64-p2-div-x12.c | 68 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() local 77 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 79 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x12.c | 67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() local 76 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 78 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x12.c | 67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() local 76 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 78 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c | 67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() local 76 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 78 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
|
D | neonfma-rr1-lut64-p2-nr2recps-x12.c | 68 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() local 77 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 79 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12()
|
D | neon-rr2-lut2048-p1-nr2recps-x12.c | 68 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() local 77 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 79 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
|
D | neonfma-rr1-lut64-p2-nr2fma-x12.c | 68 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12() local 77 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12() 79 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x12.c | 68 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12() local 77 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12() 79 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12()
|
D | neonfma-rr1-lut64-p2-div-x16.c | 73 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() local 86 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 88 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16()
|
D | neon-rr2-lut64-p2-nr2recps-x12.c | 69 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12() local 78 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12() 80 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12()
|
D | neonfma-rr1-lut2048-p1-div-x16.c | 72 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() local 85 vl89 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 87 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-lut64-p2-x12-acc2.c | 95 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() local 104 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 106 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
|
D | neon-lut64-p2-x12.c | 95 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() local 104 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 106 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
|
D | neonfma-lut64-p2-x12.c | 94 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() local 103 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 105 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
|
D | neon-lut64-p2-x12-acc3.c | 97 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() local 106 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 108 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
|
D | neonfma-lut64-p2-x12-acc3.c | 96 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() local 105 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 107 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
|
D | neon-lut64-p2-x12-acc2.c | 96 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() local 105 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 107 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
|
D | neonfma-lut64-p2-x16-acc4.c | 104 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() local 115 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 117 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
|
D | neon-lut64-p2-x16-acc4.c | 105 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() local 116 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 118 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
|
D | neon-lut64-p2-x16.c | 102 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() local 113 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 115 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
|
D | neon-lut64-p2-x16-acc2.c | 103 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() local 114 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 116 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
|
D | neonfma-lut64-p2-x16-acc2.c | 102 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() local 113 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 115 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
|
D | neonfma-lut64-p2-x16.c | 101 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() local 112 vl89 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx89 >> 32)], vl89, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 114 const float32x4_t vl89AB = vcombine_f32(vl89, vlAB); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neonfma-rr1-lut16-p3-x12.c | 80 …int32x2_t vl89 = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() local 82 …vl89 = vld1_lane_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vid… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 84 const int32x4_t vl89AB = vcombine_s32(vl89, vlAB); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12()
|