Home
last modified time | relevance | path

Searched refs:vprescale (Results 1 – 25 of 187) sorted by relevance

12345678

/external/XNNPACK/src/f32-velu/gen/
Dvelu-wasm-rr2-p6-x6.c27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local
52 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
53 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
54 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
55 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
56 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
57 …const float vz5 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx5 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
181 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
Dvelu-scalar-rr2-lut16-p3-x6.c29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local
52 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
53 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
54 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
55 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
56 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
57 const float vz5 = vx5 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
207 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
Dvelu-wasm-rr2-lut16-p3-x6.c29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local
52 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
53 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
54 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
55 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
56 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
57 …const float vz5 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx5 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
171 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
Dvelu-wasm-rr2-p6-x5.c27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local
51 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
52 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
53 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
54 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
55 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
162 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
Dvelu-scalar-rr2-lut16-p3-x5.c29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local
51 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
52 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
53 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
54 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
55 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
183 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
Dvelu-wasm-rr2-lut16-p3-x5.c29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local
51 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
52 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
53 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
54 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
55 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
153 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
Dvelu-scalar-rr2-p6-x6.c27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local
52 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
53 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
54 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
55 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
56 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
57 const float vz5 = vx5 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
217 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
Dvelu-scalar-rr2-lut16-p3-x4.c29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() local
50 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
51 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
52 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
53 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
159 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
Dvelu-wasm-rr2-p6-x4.c27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() local
50 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
51 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
52 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
53 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
143 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
Dvelu-wasm-rr2-lut16-p3-x4.c29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() local
50 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
51 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
52 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
53 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
135 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
Dvelu-scalar-rr2-p6-x5.c27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local
51 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
52 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
53 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
54 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
55 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
192 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
Dvelu-scalar-rr2-p6-x4.c27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() local
50 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
51 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
52 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
53 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
167 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
Dvelu-scalar-rr2-p6-x3.c27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() local
49 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3()
50 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3()
51 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3()
142 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3()
Dvelu-wasm-rr2-p6-x3.c27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() local
49 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x3()
50 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x3()
51 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x3()
124 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x3()
Dvelu-wasm-rr2-lut16-p3-x3.c29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() local
49 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
50 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
51 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
117 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
Dvelu-scalar-rr2-lut16-p3-x3.c29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() local
49 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
50 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
51 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
135 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
Dvelu-avx512f-rr1-lut16-p3-perm-x128.c28 const __m512 vprescale = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.prescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local
53 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
54 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
55 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
56 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
57 const __m512 vz4 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
58 const __m512 vz5 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
59 const __m512 vz6 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
60 const __m512 vz7 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
199 const __m512 vz = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
[all …]
Dvelu-avx2-rr1-p6-x72.c28 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() local
54 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
55 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
56 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
57 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
58 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
59 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
61 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
62 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72()
[all …]
Dvelu-avx2-rr1-lut4-p4-perm-x72.c28 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() local
55 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
56 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
57 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
58 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
59 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
60 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
62 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
63 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72()
[all …]
Dvelu-avx2-rr1-lut16-p3-gather-x72.c30 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() local
54 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
55 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
56 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
57 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
58 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
59 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
61 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
62 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
[all …]
Dvelu-avx2-rr1-lut8-p4-perm-x72.c28 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() local
54 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
55 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
56 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
57 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
58 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
59 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
61 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
62 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72()
[all …]
Dvelu-avx2-rr1-lut8-p4-perm-x80.c28 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() local
55 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
56 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
57 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
58 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
59 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
60 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
62 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
63 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80()
[all …]
Dvelu-avx2-rr1-p6-x80.c28 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() local
55 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
56 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
57 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
58 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
59 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
60 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
62 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
63 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80()
[all …]
/external/XNNPACK/src/f32-velu/
Dscalar-rr2-lut16-p3.c.in26 const float vprescale = params->scalar.prescale;
48 …const float vz${N} = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx${N} * vprescale, vsat_cutoff…
50 const float vz${N} = vx${N} * vprescale;
110 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f);
112 const float vz = vx * vprescale;
155 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f);
157 const float vz = vx * vprescale;
199 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f);
201 const float vz = vx * vprescale;
Dscalar-rr2-p6.c.in24 const float vprescale = params->scalar.prescale;
48 …const float vz${N} = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx${N} * vprescale, vsat_cutoff…
50 const float vz${N} = vx${N} * vprescale;
119 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f);
121 const float vz = vx * vprescale;
165 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f);
167 const float vz = vx * vprescale;
210 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f);
212 const float vz = vx * vprescale;

12345678