/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-nr1recps1fma-x20.c | 140 float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() local 149 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 155 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 161 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20()
|
D | neonfma-rr1-p5-nr2recps-x20.c | 140 float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() local 149 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 155 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 161 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20()
|
D | neon-rr2-p5-nr2recps-x20.c | 149 float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() local 158 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 164 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 170 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
|
D | neonfma-rr1-p5-nr2fma-x20.c | 140 float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() local 149 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 155 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 161 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20()
|
D | neonfma-rr1-p5-nr1recps1fma-x24.c | 152 float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() local 162 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 169 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 176 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
|
D | neonfma-rr1-p5-nr2recps-x24.c | 152 float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() local 162 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 169 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 176 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24()
|
D | neonfma-rr1-p5-nr2fma-x24.c | 152 float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() local 162 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 169 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 176 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24()
|
D | neon-rr2-p5-nr2recps-x24.c | 162 float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() local 172 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 179 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 186 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x20.c | 184 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20() local 193 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20() 199 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20() 205 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20()
|
D | neonfma-rr1-lut64-p2-nr2fma-x20.c | 184 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20() local 193 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20() 199 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20() 205 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x20.c | 177 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() local 186 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 192 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 198 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x20.c | 177 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() local 186 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() 192 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() 198 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x20.c | 177 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() local 186 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() 192 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() 198 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20()
|
D | neon-rr2-lut64-p2-nr2recps-x20.c | 193 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() local 202 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 208 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 214 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20()
|
D | neon-rr2-lut2048-p1-nr2recps-x20.c | 186 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() local 195 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 201 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 207 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20()
|
D | neonfma-rr1-lut64-p2-nr2recps-x20.c | 184 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() local 193 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 199 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 205 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x24.c | 202 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() local 212 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 219 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 226 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x24.c | 194 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() local 204 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 211 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 218 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
|
D | neonfma-rr1-lut64-p2-nr2fma-x24.c | 202 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() local 212 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 219 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 226 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x24.c | 194 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() local 204 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 211 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 218 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
|
D | neonfma-rr1-lut64-p2-nr2recps-x24.c | 202 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() local 212 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 219 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 226 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24()
|
D | neon-rr2-lut64-p2-nr2recps-x24.c | 212 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() local 222 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 229 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 236 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24()
|
D | neon-rr2-lut2048-p1-nr2recps-x24.c | 204 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() local 214 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 221 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 228 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c | 194 const float32x4_t vdGHIJ = vaddq_f32(vyGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() local 204 float32x4_t vrGHIJ = vrecpeq_f32(vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 211 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 218 vrGHIJ = vfmaq_f32(vrGHIJ, vrGHIJ, vfmsq_f32(vone, vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
|
D | neonfma-rr1-p5-div-x20.c | 140 float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local 147 float32x4_t vfGHIJ = vdivq_f32(veGHIJ, vdGHIJ); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|