/external/XNNPACK/src/f32-velu/gen/ |
D | velu-scalar-rr2-lut16-p3-x2.c | 61 float vs0 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() local 67 vs0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() 82 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() 83 vs0 -= vone; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() 90 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2()
|
D | velu-scalar-rr2-p6-x2.c | 53 float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() local 65 vs0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 88 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 89 vs0 -= vone; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2() 96 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_p6_x2()
|
D | velu-scalar-rr2-p6-x3.c | 56 float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() local 72 vs0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 104 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 105 vs0 -= vone; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 115 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3()
|
D | velu-scalar-rr2-lut16-p3-x3.c | 67 float vs0 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() local 75 vs0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 97 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 98 vs0 -= vone; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 108 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
|
D | velu-wasm-rr2-lut16-p3-x2.c | 61 float vs0 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2() local 74 vt0 *= vs0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2() 75 vs0 -= vone; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2() 82 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2()
|
D | velu-scalar-rr2-lut16-p3-x4.c | 73 float vs0 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() local 83 vs0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 112 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 113 vs0 -= vone; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 126 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
|
D | velu-wasm-rr2-p6-x2.c | 53 float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() local 80 vt0 *= vs0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 81 vs0 -= vone; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2() 88 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_p6_x2()
|
D | velu-scalar-rr2-p6-x4.c | 59 float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() local 79 vs0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 120 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 121 vs0 -= vone; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 134 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
|
D | velu-avx512f-rr1-lut16-p3-perm-x32.c | 55 __m512 vs0 = _mm512_castsi512_ps(_mm512_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() local 67 vt0 = _mm512_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() 71 vs0 = _mm512_fmsub_ps(vs0, valpha, valpha); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() 78 __m512 vy0 = _mm512_fmadd_ps(vp0, valpha, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32()
|
D | velu-scalar-rr2-lut16-p3-x5.c | 79 float vs0 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local 91 vs0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 127 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 128 vs0 -= vone; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 144 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
|
D | velu-wasm-rr2-p6-x3.c | 56 float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() local 92 vt0 *= vs0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 93 vs0 -= vone; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 103 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3()
|
D | velu-wasm-rr2-lut16-p3-x3.c | 67 float vs0 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() local 85 vt0 *= vs0; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() 86 vs0 -= vone; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() 96 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
|
D | velu-avx2-rr1-p6-x16.c | 50 __m256 vs0 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(vn0), 23)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x16() local 71 vt0 = _mm256_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx2_rr1_p6_x16() 75 vs0 = _mm256_fmsub_ps(vs0, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_p6_x16() 80 const __m256 ve0 = _mm256_fmadd_ps(vp0, valpha, vs0); in xnn_f32_velu_ukernel__avx2_rr1_p6_x16()
|
D | velu-avx2-rr1-lut4-p4-perm-x16.c | 56 __m256 vs0 = _mm256_castsi256_ps(_mm256_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16() local 68 vt0 = _mm256_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16() 72 vs0 = _mm256_fmsub_ps(vs0, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16() 77 const __m256 ve0 = _mm256_fmadd_ps(vp0, valpha, vs0); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16()
|
D | velu-scalar-rr2-p6-x5.c | 62 float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local 86 vs0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 136 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 137 vs0 -= vone; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 153 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
|
D | velu-avx512f-rr1-p6-x32.c | 52 __m512 vs0 = _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_castps_si512(vn0), 23)); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() local 73 vt0 = _mm512_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() 77 vs0 = _mm512_fmsub_ps(vs0, valpha, valpha); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32() 84 __m512 vy0 = _mm512_fmadd_ps(vp0, valpha, vs0); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x32()
|
D | velu-avx2-rr1-lut8-p4-perm-x16.c | 56 __m256 vs0 = _mm256_castsi256_ps(_mm256_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16() local 68 vt0 = _mm256_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16() 72 vs0 = _mm256_fmsub_ps(vs0, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16() 77 const __m256 ve0 = _mm256_fmadd_ps(vp0, valpha, vs0); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16()
|
D | velu-avx2-rr1-lut16-p3-gather-x16.c | 60 __m256 vs0 = _mm256_castsi256_ps(_mm256_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16() local 69 vt0 = _mm256_mul_ps(vt0, vs0); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16() 73 vs0 = _mm256_fmsub_ps(vs0, valpha, valpha); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16() 78 const __m256 ve0 = _mm256_fmadd_ps(vp0, valpha, vs0); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16()
|
D | velu-scalar-rr2-lut16-p3-x6.c | 85 float vs0 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local 99 vs0 = 0.0f; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 142 vt0 *= vs0; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 143 vs0 -= vone; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 162 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
|
D | velu-wasm-rr2-p6-x4.c | 59 float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() local 104 vt0 *= vs0; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 105 vs0 -= vone; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 118 const float ve0 = (vp0 + vs0) * valpha; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-rr2-lut64-p2-x2.c | 78 const float vs0 = fp32_from_bits(xnn_table_exp2_k_over_64[vidx0] + ve0); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x2() local 105 float vf0 = vp0 * vs0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x2()
|
D | scalar-rr2-p5-x2.c | 61 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x2() local 93 vt0 *= vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x2() 96 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x2()
|
D | scalar-rr2-p5-x2-acc2.c | 62 const float vs0 = fp32_from_bits(fp32_to_bits(vn0) << 23); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x2_acc2() local 94 vt0 *= vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x2_acc2() 97 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_p5_x2_acc2()
|
D | scalar-rr2-lut64-p2-x2-acc2.c | 79 const float vs0 = fp32_from_bits(xnn_table_exp2_k_over_64[vidx0] + ve0); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x2_acc2() local 106 float vf0 = vp0 * vs0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_rr2_lut64_p2_x2_acc2()
|
/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-scalar-rr2-lut2048-p1-div-x2.c | 54 const float vs0 = fp32_from_bits(xnn_table_exp2minus_k_over_2048[vidx0] + ve0); in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x2() local 70 const float vy0 = vp0 * vs0 + vs0; in xnn_f32_vsigmoid_ukernel__scalar_rr2_lut2048_p1_div_x2()
|