/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-nr2fma-x8.c | 75 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() local 78 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 81 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 84 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8()
|
D | neon-rr2-p5-nr2recps-x8.c | 79 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() local 82 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 85 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 88 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8()
|
D | neonfma-rr1-p5-nr2recps-x8.c | 75 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local 78 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 81 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 84 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
|
D | neonfma-rr1-p5-nr1recps1fma-x8.c | 75 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() local 78 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 81 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 84 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8()
|
D | neonfma-rr1-lut64-p2-nr2recps-x8.c | 88 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() local 91 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 94 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 97 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
|
D | neon-rr2-lut64-p2-nr2recps-x8.c | 92 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() local 95 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() 98 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() 101 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8()
|
D | neon-rr2-p5-nr2recps-x12.c | 92 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 96 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 100 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 104 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 87 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 91 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 95 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 99 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neon-rr2-lut2048-p1-nr2recps-x8.c | 88 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local 91 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 94 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 97 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 87 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 91 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 95 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 99 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x8.c | 88 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local 91 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 94 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 97 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x8.c | 84 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local 87 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 90 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 93 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x8.c | 84 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() local 87 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 90 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 93 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c | 84 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local 87 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 90 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 93 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
|
D | neonfma-rr1-lut64-p2-nr2fma-x8.c | 88 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() local 91 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() 94 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() 97 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8()
|
D | neonfma-rr1-p5-nr1recps1fma-x12.c | 87 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() local 91 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 95 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 99 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x12.c | 101 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() local 105 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 109 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 113 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
|
D | neon-rr2-p5-nr2recps-x16.c | 105 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() local 110 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 115 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 120 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x12.c | 101 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() local 105 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 109 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 113 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c | 101 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() local 105 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 109 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 113 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
|
D | neonfma-rr1-lut64-p2-nr2recps-x12.c | 106 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() local 110 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 114 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 118 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12()
|
D | neonfma-rr1-p5-nr2fma-x16.c | 99 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local 104 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 109 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 114 vr4567 = vfmaq_f32(vr4567, vr4567, vfmsq_f32(vone, vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
|
D | neon-rr2-lut2048-p1-nr2recps-x12.c | 106 const float32x4_t vd4567 = vaddq_f32(vy4567, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() local 110 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 114 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 118 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
|
D | neonfma-rr1-p5-nr2recps-x16.c | 99 const float32x4_t vd4567 = vaddq_f32(ve4567, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local 104 float32x4_t vr4567 = vrecpeq_f32(vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 109 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 114 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
|
/external/XNNPACK/src/f32-ibilinear/gen/ |
D | neonfma-c8.c | 73 const float32x4_t vd4567 = vsubq_f32(vb4567, vt4567); in xnn_f32_ibilinear_ukernel__neonfma_c8() local 77 const float32x4_t vo4567 = vfmaq_f32(vt4567, vd4567, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c8() 80 const float32x4_t vo4567 = vfmaq_lane_f32(vt4567, vd4567, valphahv, 1); in xnn_f32_ibilinear_ukernel__neonfma_c8()
|