/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-p5-div-x4.c | 31 const float vminus_log2e = -0x1.715476p+0f; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local 68 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 69 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 70 float vn2 = vz2 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 71 float vn3 = vz3 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 194 float vn = vz * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
|
D | psimd-p5-div-x24.c | 30 const psimd_f32 vminus_log2e = psimd_splat_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() local 71 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 72 psimd_f32 vn4567 = psimd_qfma_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 73 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 74 psimd_f32 vnCDEF = psimd_qfma_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 75 psimd_f32 vnGHIJ = psimd_qfma_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 76 psimd_f32 vnKLMN = psimd_qfma_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 211 psimd_f32 vn = psimd_qfma_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 270 psimd_f32 vn = psimd_qfma_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24()
|
D | scalar-p5-div-x2.c | 31 const float vminus_log2e = -0x1.715476p+0f; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() local 64 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 65 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 151 float vn = vz * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
|
D | psimd-p5-div-x16.c | 30 const psimd_f32 vminus_log2e = psimd_splat_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() local 67 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 68 psimd_f32 vn4567 = psimd_qfma_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 69 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 70 psimd_f32 vnCDEF = psimd_qfma_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 177 psimd_f32 vn = psimd_qfma_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 236 psimd_f32 vn = psimd_qfma_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
|
D | psimd-p5-div-x20.c | 30 const psimd_f32 vminus_log2e = psimd_splat_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() local 69 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() 70 psimd_f32 vn4567 = psimd_qfma_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() 71 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() 72 psimd_f32 vnCDEF = psimd_qfma_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() 73 psimd_f32 vnGHIJ = psimd_qfma_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() 194 psimd_f32 vn = psimd_qfma_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() 253 psimd_f32 vn = psimd_qfma_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20()
|
D | neonfma-rr1-p5-div-x20.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local 66 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 67 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 68 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 69 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 70 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 194 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 253 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | neonfma-rr1-p5-div-x24.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() local 68 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 69 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 70 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 71 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 72 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 73 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 212 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 271 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24()
|
D | psimd-p5-div-x12.c | 30 const psimd_f32 vminus_log2e = psimd_splat_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local 65 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 66 psimd_f32 vn4567 = psimd_qfma_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 67 psimd_f32 vn89AB = psimd_qfma_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 160 psimd_f32 vn = psimd_qfma_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 219 psimd_f32 vn = psimd_qfma_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
|
D | neonfma-rr1-p5-div-x16.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 64 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 65 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 66 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 67 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 176 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 235 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | neonfma-rr1-p5-div-x12.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 62 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 63 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 64 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 158 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 217 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | neonfma-rr1-p5-nr1recps1fma-x24.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() local 68 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 69 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 70 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 71 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 72 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 73 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 236 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 304 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
|
D | neonfma-rr1-p5-nr2recps-x24.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() local 68 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 69 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 70 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 71 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 72 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 73 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 236 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 304 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24()
|
D | neonfma-rr1-p5-nr2fma-x24.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() local 68 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 69 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 70 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 71 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 72 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 73 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 236 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 304 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24()
|
D | neonfma-rr1-p5-nr1recps1fma-x20.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() local 66 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 67 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 68 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 69 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 70 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 215 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 283 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20()
|
D | neonfma-rr1-p5-nr2recps-x20.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() local 66 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 67 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 68 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 69 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 70 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 215 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 283 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20()
|
D | neonfma-rr1-p5-nr2recps-x16.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local 64 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 65 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 66 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 67 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 194 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 262 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
|
D | psimd-p5-div-x8.c | 30 const psimd_f32 vminus_log2e = psimd_splat_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() local 63 psimd_f32 vn0123 = psimd_qfma_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 64 psimd_f32 vn4567 = psimd_qfma_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 143 psimd_f32 vn = psimd_qfma_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 202 psimd_f32 vn = psimd_qfma_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
|
D | neonfma-rr1-p5-div-x8.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() local 60 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 61 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 140 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 199 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
|
D | neon-rr2-p5-nr2recps-x24.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() local 70 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 71 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 72 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 73 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 74 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 75 float32x4_t vnKLMN = vmlaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 246 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 316 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
|
D | neon-rr2-p5-nr2recps-x20.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() local 68 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 69 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 70 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 71 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 72 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 224 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 294 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
|
D | neonfma-rr1-p5-nr2fma-x20.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() local 66 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 67 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 68 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 69 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 70 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 215 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 283 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20()
|
D | neon-rr2-p5-nr2recps-x16.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() local 66 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 67 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 68 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 69 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 202 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 272 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
|
D | neonfma-rr1-p5-nr1recps1fma-x16.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() local 64 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 65 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 66 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 67 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 194 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 262 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16()
|
D | neonfma-rr1-p5-nr2fma-x16.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local 64 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 65 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 66 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 67 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 194 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 262 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 30 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 62 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 63 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 64 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 173 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 241 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|