/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-lut2048-p1-div-x4.c | 31 const float vminus_log2e = -0x1.715476p0f; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() local 51 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 52 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 53 float vn2 = vz2 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 54 float vn3 = vz3 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 143 float vn = vz * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
|
D | scalar-lut64-p2-div-x4.c | 31 const float vminus_log2e = -0x1.715476p0f; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() local 51 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 52 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 53 float vn2 = vz2 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 54 float vn3 = vz3 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 148 float vn = vz * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
|
D | scalar-p5-div-x4.c | 28 const float vminus_log2e = -0x1.715476p+0f; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local 51 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 52 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 53 float vn2 = vz2 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 54 float vn3 = vz3 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 154 float vn = vz * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
|
D | scalar-lut2048-p1-div-x2.c | 31 const float vminus_log2e = -0x1.715476p0f; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() local 47 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() 48 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() 102 float vn = vz * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
|
D | scalar-p5-div-x2.c | 28 const float vminus_log2e = -0x1.715476p+0f; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() local 47 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 48 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 109 float vn = vz * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
|
D | scalar-lut64-p2-div-x2.c | 31 const float vminus_log2e = -0x1.715476p0f; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() local 47 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() 48 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() 105 float vn = vz * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
|
D | neonfma-rr1-p5-div-x24.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() local 52 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 53 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 54 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 55 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 56 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 57 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 169 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 195 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24()
|
D | wasmsimd-p5-div-x24.c | 27 const v128_t vminus_log2e = wasm_f32x4_splat(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() local 54 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 55 v128_t vn4567 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz4567, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 56 v128_t vn89AB = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz89AB, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 57 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzCDEF, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 58 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzGHIJ, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 59 v128_t vnKLMN = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzKLMN, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 173 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 201 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24()
|
D | neonfma-rr1-p5-div-x16.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 48 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 49 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 50 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 51 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 133 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 159 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | wasmsimd-p5-div-x16.c | 27 const v128_t vminus_log2e = wasm_f32x4_splat(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() local 50 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 51 v128_t vn4567 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz4567, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 52 v128_t vn89AB = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz89AB, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 53 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzCDEF, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 137 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 165 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
|
D | wasmsimd-p5-div-x20.c | 27 const v128_t vminus_log2e = wasm_f32x4_splat(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() local 52 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 53 v128_t vn4567 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz4567, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 54 v128_t vn89AB = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz89AB, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 55 v128_t vnCDEF = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzCDEF, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 56 v128_t vnGHIJ = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vzGHIJ, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 155 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 183 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
|
D | neonfma-rr1-p5-div-x20.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local 50 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 51 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 52 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 53 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 54 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 151 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 177 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | neonfma-rr1-p5-div-x12.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 46 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 47 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 48 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 115 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 141 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | wasmsimd-p5-div-x12.c | 27 const v128_t vminus_log2e = wasm_f32x4_splat(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() local 48 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 49 v128_t vn4567 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz4567, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 50 v128_t vn89AB = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz89AB, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 119 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 147 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12()
|
D | neonfma-rr1-p5-nr1recps1fma-x20.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() local 50 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 51 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 52 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 53 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 54 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 169 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 199 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20()
|
D | neonfma-rr1-p5-nr2recps-x20.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() local 50 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 51 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 52 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 53 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 54 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 169 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 199 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20()
|
D | neonfma-rr1-p5-nr2recps-x24.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() local 52 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 53 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 54 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 55 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 56 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 57 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 190 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 220 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24()
|
D | neonfma-rr1-p5-nr1recps1fma-x24.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() local 52 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 53 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 54 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 55 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 56 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 57 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 190 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 220 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
|
D | neonfma-rr1-p5-div-x8.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() local 44 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 45 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 97 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 123 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
|
D | wasmsimd-p5-div-x8.c | 27 const v128_t vminus_log2e = wasm_f32x4_splat(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x8() local 46 v128_t vn0123 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz0123, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x8() 47 v128_t vn4567 = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz4567, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x8() 101 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x8() 129 v128_t vn = wasm_f32x4_add(vmagic_bias, wasm_f32x4_mul(vz, vminus_log2e)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x8()
|
D | neonfma-rr1-p5-nr2fma-x24.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() local 52 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 53 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 54 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 55 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 56 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 57 float32x4_t vnKLMN = vfmaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 190 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 220 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24()
|
D | neon-rr2-p5-nr2recps-x24.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() local 53 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 54 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 55 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 56 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 57 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 58 float32x4_t vnKLMN = vmlaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 198 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 229 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
|
D | neon-rr2-p5-nr2recps-x20.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() local 51 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 52 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 53 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 54 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 55 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 176 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 207 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
|
D | neonfma-rr1-p5-nr2fma-x20.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() local 50 float32x4_t vn0123 = vfmaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 51 float32x4_t vn4567 = vfmaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 52 float32x4_t vn89AB = vfmaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 53 float32x4_t vnCDEF = vfmaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 54 float32x4_t vnGHIJ = vfmaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 169 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 199 float32x4_t vn = vfmaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20()
|
D | neon-rr2-p5-nr2recps-x16.c | 27 const float32x4_t vminus_log2e = vmovq_n_f32(-0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() local 49 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 50 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 51 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 52 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 154 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 185 float32x4_t vn = vmlaq_f32(vmagic_bias, vz, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
|