/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-lut64-p2-x20.c | 112 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() local 127 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 129 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
|
D | neonfma-lut64-p2-x20-acc5.c | 116 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() local 131 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 133 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
|
D | neonfma-lut64-p2-x20-acc2.c | 113 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() local 128 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 130 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
|
D | neon-lut64-p2-x20-acc2.c | 114 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() local 129 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 131 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
|
D | neon-lut64-p2-x20.c | 113 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() local 128 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 130 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
|
D | neon-lut64-p2-x20-acc5.c | 117 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() local 132 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 134 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-lut2048-p1-div-x20.c | 114 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() local 129 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 131 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20()
|
D | neonfma-rr1-lut64-p2-div-x20.c | 114 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() local 129 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 131 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x20.c | 114 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20() local 129 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20() 131 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20()
|
D | neonfma-rr1-lut64-p2-nr2fma-x20.c | 114 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20() local 129 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20() 131 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x20.c | 114 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() local 129 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 131 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20()
|
D | neonfma-rr1-lut64-p2-div-x24.c | 119 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() local 138 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 140 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x20.c | 114 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() local 129 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() 131 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x20.c | 114 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() local 129 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() 131 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20()
|
D | neonfma-rr1-lut2048-p1-div-x24.c | 119 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() local 138 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 140 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
|
D | neon-rr2-lut64-p2-nr2recps-x20.c | 116 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() local 131 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 133 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20()
|
D | neon-rr2-lut2048-p1-nr2recps-x20.c | 116 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() local 131 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 133 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20()
|
D | neonfma-rr1-lut64-p2-nr2recps-x20.c | 114 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() local 129 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 131 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x24.c | 119 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() local 138 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 140 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x24.c | 119 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() local 138 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 140 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
|
D | neonfma-rr1-lut64-p2-nr2fma-x24.c | 119 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() local 138 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 140 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x24.c | 119 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() local 138 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 140 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
|
D | neonfma-rr1-lut64-p2-nr2recps-x24.c | 119 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() local 138 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 140 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24()
|
D | neon-rr2-lut64-p2-nr2recps-x24.c | 121 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() local 140 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 142 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24()
|
D | neon-rr2-lut2048-p1-nr2recps-x24.c | 121 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() local 140 vlGH = vld1_lane_f32(&xnn_table_exp2_k_over_2048[(uint32_t) (vidxGH >> 32)], vlGH, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 142 const float32x4_t vlGHIJ = vcombine_f32(vlGH, vlIJ); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
|