/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-lut2048-p1-div-x4.c | 77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local 79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() 81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() 154 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local 156 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() 158 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
|
D | neonfma-rr1-lut64-p2-div-x4.c | 77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local 79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() 81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() 156 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local 158 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() 160 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x4.c | 77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local 79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() 81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() 163 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local 165 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() 167 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x4.c | 77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local 79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() 81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() 163 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local 165 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() 167 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x4.c | 77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local 79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() 81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() 165 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local 167 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() 169 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x4.c | 77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local 79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() 81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() 163 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local 165 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() 167 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
|
D | neon-rr2-lut2048-p1-nr2recps-x4.c | 79 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local 81 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() 83 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() 167 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local 169 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() 171 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
|
D | neonfma-rr1-lut64-p2-nr2recps-x4.c | 77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local 79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() 81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() 165 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local 167 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() 169 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
|
D | neon-rr2-lut64-p2-nr2recps-x4.c | 79 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local 81 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() 83 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() 169 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local 171 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() 173 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
|
D | neonfma-rr1-lut64-p2-nr2fma-x4.c | 77 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local 79 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() 81 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() 165 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local 167 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() 169 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
|
D | neonfma-rr1-lut64-p2-div-x8.c | 185 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local 187 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 189 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 264 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local 266 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 268 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
|
D | neonfma-rr1-lut2048-p1-div-x8.c | 181 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local 183 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 185 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 258 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local 260 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 262 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c | 193 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local 195 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 197 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 279 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local 281 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 283 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x8.c | 193 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local 195 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 197 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 279 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local 281 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 283 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x8.c | 197 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local 199 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 201 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 285 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local 287 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 289 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-lut64-p2-x4.c | 76 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local 78 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 80 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 152 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local 154 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 156 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
|
D | neonfma-lut64-p2-x4.c | 75 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local 77 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 79 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 151 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local 153 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 155 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
|
D | neon-lut64-p2-x8-acc2.c | 177 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() local 179 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 181 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 253 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() local 255 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 257 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
|
D | neonfma-lut64-p2-x8-acc2.c | 176 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() local 178 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 180 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 252 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() local 254 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 256 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
|
D | neon-lut64-p2-x8.c | 174 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() local 176 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 178 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 250 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() local 252 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 254 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
|
D | neonfma-lut64-p2-x8.c | 173 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() local 175 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 177 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 249 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() local 251 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 253 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
|
/external/XNNPACK/src/math/ |
D | sigmoid-neonfma-rr1-lut64-p2-div.c | 74 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_div() local 76 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_div() 78 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_div()
|
D | sigmoid-neonfma-rr2-lut64-p2-div.c | 75 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_math_f32_sigmoid__neonfma_rr2_lut64_p2_div() local 77 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_math_f32_sigmoid__neonfma_rr2_lut64_p2_div() 79 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_math_f32_sigmoid__neonfma_rr2_lut64_p2_div()
|
D | sigmoid-neon-rr1-lut64-p2-nr2recps.c | 74 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_math_f32_sigmoid__neon_rr1_lut64_p2_nr2recps() local 76 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_math_f32_sigmoid__neon_rr1_lut64_p2_nr2recps() 78 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_math_f32_sigmoid__neon_rr1_lut64_p2_nr2recps()
|
D | sigmoid-neonfma-rr1-lut64-p2-nr2recps.c | 74 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_nr2recps() local 76 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_nr2recps() 78 const float32x4_t vl = vcombine_f32(vl_lo, vl_hi); in xnn_math_f32_sigmoid__neonfma_rr1_lut64_p2_nr2recps()
|