/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-nr2recps-x24.c | 136 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 137 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 138 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 139 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 140 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 141 vrKLMN = vmulq_f32(vrKLMN, vrecpsq_f32(vrKLMN, vdKLMN)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 143 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 144 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 145 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 146 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() [all …]
|
D | neonfma-rr1-p5-nr2recps-x20.c | 122 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 123 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 124 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 125 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 126 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 128 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 129 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 130 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 131 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 132 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() [all …]
|
D | neon-rr2-p5-nr2recps-x24.c | 144 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 145 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 146 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 147 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 148 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 149 vrKLMN = vmulq_f32(vrKLMN, vrecpsq_f32(vrKLMN, vdKLMN)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 151 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 152 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 153 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 154 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() [all …]
|
D | neon-rr2-p5-nr2recps-x20.c | 129 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 130 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 131 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 132 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 133 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 135 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 136 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 137 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 138 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 139 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() [all …]
|
D | neon-rr2-p5-nr2recps-x16.c | 114 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 115 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 116 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 117 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 119 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 120 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 121 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 122 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 170 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 171 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() [all …]
|
D | neonfma-rr1-p5-nr2recps-x16.c | 108 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 109 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 110 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 111 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 113 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 114 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 115 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 116 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 163 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 164 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() [all …]
|
D | neon-rr2-p5-nr2recps-x12.c | 99 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 100 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 101 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 103 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 104 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 105 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 148 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 149 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 179 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 180 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 94 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 95 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 96 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 98 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 99 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 100 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 142 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 143 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 172 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 173 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x20.c | 146 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 147 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 148 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 149 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 150 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 152 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 153 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 154 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 155 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 156 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() [all …]
|
D | neonfma-rr1-lut64-p2-nr2recps-x24.c | 173 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 174 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 175 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 176 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 177 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 178 vrKLMN = vmulq_f32(vrKLMN, vrecpsq_f32(vrKLMN, vdKLMN)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 180 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 181 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 182 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 183 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() [all …]
|
D | neon-rr2-lut2048-p1-nr2recps-x24.c | 173 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 174 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 175 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 176 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 177 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 178 vrKLMN = vmulq_f32(vrKLMN, vrecpsq_f32(vrKLMN, vdKLMN)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 180 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 181 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 182 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 183 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() [all …]
|
D | neon-rr2-lut64-p2-nr2recps-x24.c | 181 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 182 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 183 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 184 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 185 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 186 vrKLMN = vmulq_f32(vrKLMN, vrecpsq_f32(vrKLMN, vdKLMN)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 188 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 189 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 190 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 191 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2recps-x24.c | 165 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 166 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 167 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 168 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 169 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 170 vrKLMN = vmulq_f32(vrKLMN, vrecpsq_f32(vrKLMN, vdKLMN)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 172 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 173 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 174 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 175 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2recps-x16.c | 127 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 128 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 129 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 130 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 132 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 133 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 134 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 135 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 189 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 190 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() [all …]
|
D | neonfma-rr1-lut64-p2-nr2recps-x20.c | 153 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 154 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 155 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 156 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 157 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 159 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 160 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 161 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 162 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() 163 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20() [all …]
|
D | neon-rr2-lut64-p2-nr2recps-x20.c | 160 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 161 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 162 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 163 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 164 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 166 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 167 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 168 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 169 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() 170 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20() [all …]
|
D | neon-rr2-lut2048-p1-nr2recps-x20.c | 153 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 154 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 155 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 156 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 157 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 159 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 160 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 161 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 162 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 163 vrGHIJ = vmulq_f32(vrGHIJ, vrecpsq_f32(vrGHIJ, vdGHIJ)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() [all …]
|
D | neon-rr2-p5-nr2recps-x8.c | 84 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 85 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 87 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 88 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 126 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 127 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 157 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 158 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8()
|
D | neonfma-rr1-p5-nr2recps-x8.c | 80 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 81 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 83 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 84 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 121 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 122 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 151 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 152 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x12.c | 108 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 109 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 110 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 112 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 113 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 114 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 163 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 164 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 200 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 201 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
|
D | neonfma-rr1-lut64-p2-nr2recps-x12.c | 113 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 114 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 115 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 117 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 118 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 119 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 169 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 170 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 207 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 208 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12()
|
D | neon-rr2-lut2048-p1-nr2recps-x12.c | 113 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 114 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 115 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 117 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 118 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 119 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 169 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 170 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 207 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 208 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
|
D | neon-rr2-lut2048-p1-nr2recps-x16.c | 133 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 134 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 135 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 136 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 138 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 139 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 140 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 141 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 196 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 197 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() [all …]
|
D | neon-rr2-lut64-p2-nr2recps-x16.c | 139 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() 140 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() 141 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() 142 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() 144 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() 145 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() 146 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() 147 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() 203 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() 204 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16() [all …]
|
D | neonfma-rr1-lut64-p2-nr2recps-x16.c | 133 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16() 134 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16() 135 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16() 136 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16() 138 vr0123 = vmulq_f32(vr0123, vrecpsq_f32(vr0123, vd0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16() 139 vr4567 = vmulq_f32(vr4567, vrecpsq_f32(vr4567, vd4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16() 140 vr89AB = vmulq_f32(vr89AB, vrecpsq_f32(vr89AB, vd89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16() 141 vrCDEF = vmulq_f32(vrCDEF, vrecpsq_f32(vrCDEF, vdCDEF)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16() 196 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16() 197 vr = vmulq_f32(vr, vrecpsq_f32(vr, vd)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16() [all …]
|