/external/XNNPACK/src/x32-packx/ |
D | x4-wasmsimd.c | 45 const v128_t vx3 = wasm_v128_load(x3); in xnn_x32_packx_ukernel_4x__wasmsimd() local 50 const v128_t vt2 = wasm_v32x4_shuffle(vx2, vx3, 0, 4, 1, 5); in xnn_x32_packx_ukernel_4x__wasmsimd() 51 const v128_t vt3 = wasm_v32x4_shuffle(vx2, vx3, 2, 6, 3, 7); in xnn_x32_packx_ukernel_4x__wasmsimd() 72 const float vx3 = *x3++; in xnn_x32_packx_ukernel_4x__wasmsimd() local 76 y[3] = vx3; in xnn_x32_packx_ukernel_4x__wasmsimd()
|
D | x4-sse.c | 46 const __m128 vx3 = _mm_loadu_ps(x3); in xnn_x32_packx_ukernel_4x__sse() local 51 const __m128 vt2 = _mm_unpacklo_ps(vx2, vx3); in xnn_x32_packx_ukernel_4x__sse() 52 const __m128 vt3 = _mm_unpackhi_ps(vx2, vx3); in xnn_x32_packx_ukernel_4x__sse() 76 const __m128 vx3 = _mm_load_ss(x3); in xnn_x32_packx_ukernel_4x__sse() local 80 const __m128 vx23 = _mm_unpacklo_ps(vx2, vx3); in xnn_x32_packx_ukernel_4x__sse()
|
/external/XNNPACK/src/f32-vlrelu/gen/ |
D | vlrelu-scalar-x4.c | 31 const float vx3 = x[3]; in xnn_f32_vlrelu_ukernel__scalar_x4() local 37 float vacc3 = vx3 * vslope; in xnn_f32_vlrelu_ukernel__scalar_x4() 42 vacc3 = XNN_UNPREDICTABLE(vx3 < 0.0f) ? vacc3 : vx3; in xnn_f32_vlrelu_ukernel__scalar_x4()
|
D | vlrelu-wasm-x4.c | 32 const float vx3 = x[3]; in xnn_f32_vlrelu_ukernel__wasm_x4() local 38 const float vnegx3 = __builtin_wasm_min_f32(vx3, vzero); in xnn_f32_vlrelu_ukernel__wasm_x4() 47 const float vposx3 = __builtin_wasm_max_f32(vx3, vzero); in xnn_f32_vlrelu_ukernel__wasm_x4()
|
/external/XNNPACK/src/f32-hswish/gen/ |
D | hswish-wasm-x4.c | 37 float vx3 = x[3]; in xnn_f32_hswish_ukernel__wasm_x4() local 46 float vacc3 = vx3 + vthree; in xnn_f32_hswish_ukernel__wasm_x4() 47 vx3 *= vsixth; in xnn_f32_hswish_ukernel__wasm_x4() 62 vacc3 *= vx3; in xnn_f32_hswish_ukernel__wasm_x4()
|
D | hswish-scalar-x4.c | 37 float vx3 = x[3]; in xnn_f32_hswish_ukernel__scalar_x4() local 46 float vacc3 = vx3 + vthree; in xnn_f32_hswish_ukernel__scalar_x4() 47 vx3 *= vsixth; in xnn_f32_hswish_ukernel__scalar_x4() 62 vacc3 *= vx3; in xnn_f32_hswish_ukernel__scalar_x4()
|
/external/XNNPACK/src/f32-vunary/gen/ |
D | vsqr-scalar-x4.c | 32 const float vx3 = x[3]; in xnn_f32_vsqr_ukernel__scalar_x4() local 38 const float vy3 = vx3 * vx3; in xnn_f32_vsqr_ukernel__scalar_x4()
|
D | vneg-scalar-x4.c | 32 const float vx3 = x[3]; in xnn_f32_vneg_ukernel__scalar_x4() local 38 const float vy3 = -vx3; in xnn_f32_vneg_ukernel__scalar_x4()
|
/external/XNNPACK/src/f32-rmax/ |
D | wasmsimd-x86.c | 30 const v128_t vx3 = wasm_v128_load(x + 12); in xnn_f32_rmax_ukernel__wasmsimd_x86() local 36 const v128_t vlt3 = wasm_f32x4_lt(vx3, vmax3); in xnn_f32_rmax_ukernel__wasmsimd_x86() 41 vmax3 = wasm_v128_bitselect(vmax3, vx3, vlt3); in xnn_f32_rmax_ukernel__wasmsimd_x86()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-lut64-p2-x4.c | 55 const float vx3 = vi3 - vi_max; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() local 67 float vn3 = vx3 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 106 float vt3 = vn3 * vminus_ln2_o64_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 145 if XNN_UNPREDICTABLE(vx3 < vdenorm_cutoff) { in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
|
D | scalar-p5-x4-acc2.c | 55 const float vx3 = vi3 - vi_max; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() local 65 float vn3 = vx3 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 85 float vt3 = vn3 * vminus_ln2_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 138 if XNN_UNPREDICTABLE(vx3 < vdenorm_cutoff) { in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2()
|
D | scalar-p5-x4.c | 54 const float vx3 = vi3 - vi_max; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() local 64 float vn3 = vx3 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 84 float vt3 = vn3 * vminus_ln2_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 137 if XNN_UNPREDICTABLE(vx3 < vdenorm_cutoff) { in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4()
|
D | scalar-p5-x4-acc4.c | 57 const float vx3 = vi3 - vi_max; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() local 67 float vn3 = vx3 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 87 float vt3 = vn3 * vminus_ln2_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 140 if XNN_UNPREDICTABLE(vx3 < vdenorm_cutoff) { in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4()
|
D | scalar-lut64-p2-x4-acc2.c | 56 const float vx3 = vi3 - vi_max; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() local 68 float vn3 = vx3 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 107 float vt3 = vn3 * vminus_ln2_o64_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 146 if XNN_UNPREDICTABLE(vx3 < vdenorm_cutoff) { in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
|
D | scalar-lut64-p2-x4-acc4.c | 58 const float vx3 = vi3 - vi_max; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() local 70 float vn3 = vx3 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 109 float vt3 = vn3 * vminus_ln2_o64_hi + vx3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 148 if XNN_UNPREDICTABLE(vx3 < vdenorm_cutoff) { in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
|
/external/XNNPACK/src/f32-vsqrt/gen/ |
D | avx512f-nr1fma1adj-x64.c | 33 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local 39 const __m512 vrsqrtx3 = _mm512_rsqrt14_ps(vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 47 __m512 vsqrtx3 = _mm512_mul_ps(vrsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 67 const __m512 vadjustment3 = _mm512_fnmadd_ps(vsqrtx3, vsqrtx3, vx3); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
|
D | scalar-sqrt-x4.c | 30 const float vx3 = x[3]; in xnn_f32_vsqrt_ukernel__scalar_sqrt_x4() local 36 const float vy3 = sqrtf(vx3); in xnn_f32_vsqrt_ukernel__scalar_sqrt_x4()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx2-rr1-lut4-p4-perm-x32.c | 47 __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32() local 53 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32() 117 vx3 = _mm256_mul_ps(vx3, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32() 122 const __m256 vy3 = _mm256_blendv_ps(vx3, ve3, vx3); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32()
|
D | velu-avx2-rr1-lut16-p3-gather-x32.c | 46 __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32() local 52 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32() 116 vx3 = _mm256_mul_ps(vx3, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32() 121 const __m256 vy3 = _mm256_blendv_ps(vx3, ve3, vx3); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32()
|
D | velu-avx2-rr1-lut8-p4-perm-x32.c | 46 __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32() local 52 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32() 116 vx3 = _mm256_mul_ps(vx3, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32() 121 const __m256 vy3 = _mm256_blendv_ps(vx3, ve3, vx3); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32()
|
D | velu-avx2-rr1-p6-x32.c | 46 __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() local 52 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 118 vx3 = _mm256_mul_ps(vx3, vbeta); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32() 123 const __m256 vy3 = _mm256_blendv_ps(vx3, ve3, vx3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x32()
|
/external/XNNPACK/src/f32-vrnd/gen/ |
D | vrndz-scalar-libm-x4.c | 31 const float vx3 = x[3]; in xnn_f32_vrndz_ukernel__scalar_libm_x4() local 37 const float vy3 = truncf(vx3); in xnn_f32_vrndz_ukernel__scalar_libm_x4()
|
D | vrndd-scalar-libm-x4.c | 31 const float vx3 = x[3]; in xnn_f32_vrndd_ukernel__scalar_libm_x4() local 37 const float vy3 = floorf(vx3); in xnn_f32_vrndd_ukernel__scalar_libm_x4()
|
D | vrndu-scalar-libm-x4.c | 31 const float vx3 = x[3]; in xnn_f32_vrndu_ukernel__scalar_libm_x4() local 37 const float vy3 = ceilf(vx3); in xnn_f32_vrndu_ukernel__scalar_libm_x4()
|
D | vrndne-scalar-libm-x4.c | 31 const float vx3 = x[3]; in xnn_f32_vrndne_ukernel__scalar_libm_x4() local 37 const float vy3 = nearbyintf(vx3); in xnn_f32_vrndne_ukernel__scalar_libm_x4()
|