/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-nr2fma-x12.c | 114 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 121 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 125 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 129 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 114 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 121 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 125 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 129 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neon-rr2-p5-nr2recps-x12.c | 121 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 128 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 132 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 136 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
D | neonfma-rr1-p5-nr1recps1fma-x12.c | 114 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() local 121 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 125 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 129 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
|
D | neonfma-rr1-p5-nr2recps-x16.c | 126 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local 134 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 139 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 144 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
|
D | neon-rr2-p5-nr2recps-x16.c | 134 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() local 142 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 147 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 152 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c | 141 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() local 148 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 152 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 156 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
|
D | neonfma-rr1-p5-nr1recps1fma-x16.c | 126 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() local 134 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 139 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 144 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x12.c | 141 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() local 148 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 152 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 156 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
|
D | neonfma-rr1-p5-nr2fma-x16.c | 126 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local 134 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 139 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 144 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x12.c | 141 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() local 148 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 152 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 156 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
|
D | neonfma-rr1-p5-nr1recps1fma-x20.c | 138 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() local 147 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 153 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 159 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20()
|
D | neonfma-rr1-p5-nr2recps-x20.c | 138 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() local 147 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 153 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 159 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20()
|
D | neonfma-rr1-lut64-p2-nr2fma-x12.c | 146 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12() local 153 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12() 157 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12() 161 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x12.c | 146 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12() local 153 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12() 157 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12() 161 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12()
|
D | neon-rr2-lut2048-p1-nr2recps-x12.c | 148 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() local 155 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 159 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 163 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
|
D | neonfma-rr1-lut64-p2-nr2recps-x12.c | 146 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() local 153 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 157 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 161 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12()
|
D | neon-rr2-lut64-p2-nr2recps-x12.c | 153 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12() local 160 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12() 164 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12() 168 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12()
|
D | neon-rr2-p5-nr2recps-x20.c | 147 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() local 156 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 162 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 168 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
|
D | neonfma-rr1-p5-nr2fma-x20.c | 138 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() local 147 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 153 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 159 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x16.c | 158 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() local 166 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 171 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 176 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x16.c | 158 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() local 166 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() 171 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() 176 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16()
|
D | neon-rr2-lut64-p2-nr2recps-x16.c | 172 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() local 180 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() 185 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() 190 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16()
|
D | neonfma-rr1-p5-nr1recps1fma-x24.c | 150 float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() local 160 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 167 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 174 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
|
D | neonfma-rr1-lut64-p2-nr2fma-x16.c | 164 const float32x4_t vd89AB = vaddq_f32(vy89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16() local 172 float32x4_t vr89AB = vrecpeq_f32(vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16() 177 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16() 182 vr89AB = vfmaq_f32(vr89AB, vr89AB, vfmsq_f32(vone, vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16()
|