Home
last modified time | relevance | path

Searched refs:vl_lo (Results 1 – 25 of 97) sorted by relevance

1234

/external/XNNPACK/src/f32-sigmoid/gen/
Dneonfma-rr1-lut2048-p1-div-x4.c77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local
79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
154 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local
156 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
158 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
Dneonfma-rr1-lut64-p2-div-x4.c77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local
79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
156 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local
158 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
160 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x4.c77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local
79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
163 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local
165 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
167 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
Dneonfma-rr1-lut2048-p1-nr2recps-x4.c77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local
79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
163 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local
165 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
167 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
Dneonfma-rr1-lut64-p2-nr1recps1fma-x4.c77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local
79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
165 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local
167 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
169 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
Dneonfma-rr1-lut2048-p1-nr2fma-x4.c77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local
79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
163 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local
165 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
167 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
Dneon-rr2-lut2048-p1-nr2recps-x4.c79 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
81 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
83 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
167 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
169 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
171 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
Dneonfma-rr1-lut64-p2-nr2recps-x4.c77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local
79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
165 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local
167 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
169 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
Dneon-rr2-lut64-p2-nr2recps-x4.c79 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
81 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
83 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
169 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
171 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
173 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
Dneonfma-rr1-lut64-p2-nr2fma-x4.c77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local
79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
165 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local
167 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
169 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
Dneonfma-rr1-lut64-p2-div-x8.c185 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local
187 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
189 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
264 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local
266 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
268 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
Dneonfma-rr1-lut2048-p1-div-x8.c181 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local
183 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
185 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
258 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local
260 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
262 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x8.c193 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local
195 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
197 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
279 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local
281 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
283 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
Dneonfma-rr1-lut2048-p1-nr2recps-x8.c193 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local
195 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
197 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
279 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local
281 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
283 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
Dneonfma-rr1-lut64-p2-nr1recps1fma-x8.c197 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local
199 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
201 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
285 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local
287 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
289 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneon-lut64-p2-x4.c76 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local
78 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
80 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
152 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local
154 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
156 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
Dneonfma-lut64-p2-x4.c75 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local
77 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
79 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
151 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local
153 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
155 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
Dneon-lut64-p2-x8-acc2.c177 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() local
179 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
181 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
253 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() local
255 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
257 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
Dneonfma-lut64-p2-x8-acc2.c176 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() local
178 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
180 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
252 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() local
254 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
256 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
Dneon-lut64-p2-x8.c174 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() local
176 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
178 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
250 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() local
252 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
254 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
Dneonfma-lut64-p2-x8.c173 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() local
175 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
177 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
249 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() local
251 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
253 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
/external/XNNPACK/src/math/
Dsigmoid-neonfma-rr1-lut64-p2-div.c74 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_div() local
76 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_div()
78 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_div()
Dsigmoid-neonfma-rr2-lut64-p2-div.c75 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_math_f32_sigmoid__neonfma_rr2_lut64_p2_div() local
77 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_math_f32_sigmoid__neonfma_rr2_lut64_p2_div()
79 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_math_f32_sigmoid__neonfma_rr2_lut64_p2_div()
Dsigmoid-neon-rr1-lut64-p2-nr2recps.c74 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_math_f32_sigmoid__neon_rr1_lut64_p2_nr2recps() local
76 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_math_f32_sigmoid__neon_rr1_lut64_p2_nr2recps()
78 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_math_f32_sigmoid__neon_rr1_lut64_p2_nr2recps()
Dsigmoid-neonfma-rr1-lut64-p2-nr2recps.c74 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_nr2recps() local
76 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_nr2recps()
78 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_nr2recps()

1234