/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasm-rr2-p6-x6.c | 27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() local 52 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 53 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 54 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 55 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 56 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 57 …const float vz5 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx5 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x6() 181 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x6()
|
D | velu-scalar-rr2-lut16-p3-x6.c | 29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local 52 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 53 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 54 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 55 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 56 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 57 const float vz5 = vx5 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() 207 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
|
D | velu-wasm-rr2-lut16-p3-x6.c | 29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local 52 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 53 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 54 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 55 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 56 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 57 …const float vz5 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx5 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() 171 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
|
D | velu-wasm-rr2-p6-x5.c | 27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() local 51 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 52 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 53 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 54 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 55 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x5() 162 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x5()
|
D | velu-scalar-rr2-lut16-p3-x5.c | 29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local 51 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 52 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 53 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 54 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 55 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() 183 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
|
D | velu-wasm-rr2-lut16-p3-x5.c | 29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local 51 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 52 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 53 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 54 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 55 …const float vz4 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx4 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() 153 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
|
D | velu-scalar-rr2-p6-x6.c | 27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() local 52 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 53 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 54 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 55 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 56 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 57 const float vz5 = vx5 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6() 217 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x6()
|
D | velu-scalar-rr2-lut16-p3-x4.c | 29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() local 50 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 51 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 52 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 53 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() 159 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
|
D | velu-wasm-rr2-p6-x4.c | 27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() local 50 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 51 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 52 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 53 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x4() 143 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x4()
|
D | velu-wasm-rr2-lut16-p3-x4.c | 29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() local 50 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 51 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 52 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 53 …const float vz3 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx3 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() 135 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
|
D | velu-scalar-rr2-p6-x5.c | 27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() local 51 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 52 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 53 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 54 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 55 const float vz4 = vx4 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5() 192 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x5()
|
D | velu-scalar-rr2-p6-x4.c | 27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() local 50 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 51 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 52 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 53 const float vz3 = vx3 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4() 167 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x4()
|
D | velu-scalar-rr2-p6-x3.c | 27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() local 49 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 50 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 51 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3() 142 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_p6_x3()
|
D | velu-wasm-rr2-p6-x3.c | 27 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() local 49 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 50 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 51 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_p6_x3() 124 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_p6_x3()
|
D | velu-wasm-rr2-lut16-p3-x3.c | 29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() local 49 …const float vz0 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx0 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() 50 …const float vz1 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx1 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() 51 …const float vz2 = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx2 * vprescale, vsat_cutoff), 0.0… in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() 117 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
|
D | velu-scalar-rr2-lut16-p3-x3.c | 29 const float vprescale = params->scalar.prescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() local 49 const float vz0 = vx0 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 50 const float vz1 = vx1 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 51 const float vz2 = vx2 * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() 135 const float vz = vx * vprescale; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
|
D | velu-avx512f-rr1-lut16-p3-perm-x128.c | 28 const __m512 vprescale = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.prescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local 53 const __m512 vz0 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 54 const __m512 vz1 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 55 const __m512 vz2 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 56 const __m512 vz3 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 57 const __m512 vz4 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 58 const __m512 vz5 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 59 const __m512 vz6 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 60 const __m512 vz7 = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() 199 const __m512 vz = _mm512_max_ps(vsat_cutoff, _mm512_mul_ps(vx, vprescale)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() [all …]
|
D | velu-avx2-rr1-p6-x72.c | 28 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() local 54 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 55 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 56 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 57 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 58 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 59 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 61 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() 62 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x72() [all …]
|
D | velu-avx2-rr1-lut4-p4-perm-x72.c | 28 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() local 55 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 56 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 57 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 58 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 59 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 60 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 62 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() 63 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72() [all …]
|
D | velu-avx2-rr1-lut16-p3-gather-x72.c | 30 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() local 54 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 55 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 56 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 57 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 58 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 59 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 61 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 62 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() [all …]
|
D | velu-avx2-rr1-lut8-p4-perm-x72.c | 28 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() local 54 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 55 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 56 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 57 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 58 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 59 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 60 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 61 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() 62 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72() [all …]
|
D | velu-avx2-rr1-lut8-p4-perm-x80.c | 28 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() local 55 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 56 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 57 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 58 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 59 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 60 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 62 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() 63 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80() [all …]
|
D | velu-avx2-rr1-p6-x80.c | 28 const __m256 vprescale = _mm256_broadcast_ps((const __m128*) params->sse.prescale); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() local 55 const __m256 vz0 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx0, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 56 const __m256 vz1 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx1, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 57 const __m256 vz2 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx2, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 58 const __m256 vz3 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx3, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 59 const __m256 vz4 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx4, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 60 const __m256 vz5 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx5, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 61 const __m256 vz6 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx6, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 62 const __m256 vz7 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx7, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() 63 const __m256 vz8 = _mm256_max_ps(vsat_cutoff, _mm256_mul_ps(vx8, vprescale)); in xnn_f32_velu_ukernel__avx2_rr1_p6_x80() [all …]
|
/external/XNNPACK/src/f32-velu/ |
D | scalar-rr2-lut16-p3.c.in | 26 const float vprescale = params->scalar.prescale; 48 …const float vz${N} = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx${N} * vprescale, vsat_cutoff… 50 const float vz${N} = vx${N} * vprescale; 110 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 112 const float vz = vx * vprescale; 155 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 157 const float vz = vx * vprescale; 199 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 201 const float vz = vx * vprescale;
|
D | scalar-rr2-p6.c.in | 24 const float vprescale = params->scalar.prescale; 48 …const float vz${N} = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx${N} * vprescale, vsat_cutoff… 50 const float vz${N} = vx${N} * vprescale; 119 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 121 const float vz = vx * vprescale; 165 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 167 const float vz = vx * vprescale; 210 …const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 212 const float vz = vx * vprescale;
|