/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-nr2fma-x8.c | 106 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() local 109 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 112 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 116 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8()
|
D | neonfma-rr1-p5-nr1recps1fma-x8.c | 106 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() local 109 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 112 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 116 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 119 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 123 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 127 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 132 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x8.c | 128 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() local 131 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 134 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 138 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
|
D | neonfma-rr1-p5-nr2recps-x8.c | 106 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local 109 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 112 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 116 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
|
D | neonfma-rr1-lut64-p2-nr2fma-x8.c | 132 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() local 135 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() 138 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() 142 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8()
|
D | neon-rr2-p5-nr2recps-x8.c | 112 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() local 115 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 118 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 122 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c | 128 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local 131 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 134 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 138 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x8.c | 132 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local 135 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 138 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 142 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
|
D | neonfma-rr1-p5-nr1recps1fma-x12.c | 119 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() local 123 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 127 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 132 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
|
D | neonfma-rr1-p5-nr2fma-x16.c | 132 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local 137 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 142 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 148 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x12.c | 146 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() local 150 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 154 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 159 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
|
D | neonfma-rr1-lut64-p2-nr2fma-x12.c | 151 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12() local 155 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12() 159 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12() 164 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x8.c | 128 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local 131 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 134 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 138 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 119 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 123 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 127 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 132 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neon-rr2-p5-nr2recps-x12.c | 126 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 130 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 134 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 139 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
D | neonfma-rr1-lut64-p2-nr2recps-x8.c | 132 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() local 135 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 138 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 142 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
|
D | neon-rr2-lut2048-p1-nr2recps-x8.c | 134 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local 137 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 140 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 144 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c | 146 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() local 150 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 154 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 159 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
|
D | neonfma-rr1-p5-nr1recps1fma-x16.c | 132 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() local 137 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 142 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 148 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16()
|
D | neonfma-rr1-p5-nr2fma-x20.c | 145 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() local 151 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 157 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 164 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20()
|
D | neon-rr2-lut64-p2-nr2recps-x8.c | 138 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() local 141 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() 144 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() 148 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8()
|
D | neonfma-rr1-p5-nr2recps-x16.c | 132 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local 137 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 142 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 148 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
|
D | neonfma-rr1-p5-nr1recps1fma-x20.c | 145 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() local 151 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 157 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 164 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x12.c | 151 float32x4_t vr0123 = vrecpeq_f32(vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12() local 155 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12() 159 vr0123 = vfmaq_f32(vr0123, vr0123, vfmsq_f32(vone, vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12() 164 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12()
|