Home
last modified time | relevance | path

Searched refs:vsixth (Results 1 – 25 of 26) sorted by relevance

12

/external/XNNPACK/src/f32-hswish/gen/
Dscalar-x4.c26 const float vsixth = params->scalar.sixth; in xnn_f32_hswish_ukernel__scalar_x4() local
39 float vacc0 = vx0 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x4()
40 float vacc1 = vx1 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x4()
41 float vacc2 = vx2 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x4()
42 float vacc3 = vx3 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x4()
68 float vacc = vx * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x4()
Dwasm-x4.c26 const float vsixth = params->scalar.sixth; in xnn_f32_hswish_ukernel__wasm_x4() local
39 float vacc0 = vx0 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x4()
40 float vacc1 = vx1 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x4()
41 float vacc2 = vx2 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x4()
42 float vacc3 = vx3 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x4()
68 float vacc = vx * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x4()
Dneonfma-x8.c27 const float32x4_t vsixth = vld1q_dup_f32(&params->scalar.sixth); in xnn_f32_hswish_ukernel__neonfma_x8() local
36 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8()
37 float32x4_t vacc4567 = vfmaq_f32(vhalf, vx4567, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8()
53 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8()
61 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8()
Dpsimd-x8.c27 const psimd_f32 vsixth = psimd_load_splat_f32(&params->scalar.sixth); in xnn_f32_hswish_ukernel__psimd_x8() local
37 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8()
38 psimd_f32 vacc4567 = psimd_qfma_f32(vhalf, vx4567, vsixth); in xnn_f32_hswish_ukernel__psimd_x8()
56 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8()
65 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8()
Dneon-x8.c27 const float32x4_t vsixth = vld1q_dup_f32(&params->scalar.sixth); in xnn_f32_hswish_ukernel__neon_x8() local
36 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8()
37 float32x4_t vacc4567 = vmlaq_f32(vhalf, vx4567, vsixth); in xnn_f32_hswish_ukernel__neon_x8()
53 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8()
61 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8()
Dsse-x8.c27 const __m128 vsixth = _mm_load_ps(params->sse.sixth); in xnn_f32_hswish_ukernel__sse_x8() local
37 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8()
38 __m128 vacc4567 = _mm_mul_ps(vx4567, vsixth); in xnn_f32_hswish_ukernel__sse_x8()
59 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8()
69 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8()
Davx512f-x32.c28 const __m512 vsixth = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.sixth)); in xnn_f32_hswish_ukernel__avx512f_x32() local
38 __m512 vacc0123456789ABCDEF = _mm512_fmadd_ps(vx0123456789ABCDEF, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x32()
39 __m512 vaccGHIJKLMNOPQRSTUV = _mm512_fmadd_ps(vxGHIJKLMNOPQRSTUV, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x32()
57 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x32()
72 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x32()
Dwasm-x2.c26 const float vsixth = params->scalar.sixth; in xnn_f32_hswish_ukernel__wasm_x2() local
37 float vacc0 = vx0 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x2()
38 float vacc1 = vx1 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x2()
55 float vacc = vx * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x2()
Dscalar-x2.c26 const float vsixth = params->scalar.sixth; in xnn_f32_hswish_ukernel__scalar_x2() local
37 float vacc0 = vx0 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x2()
38 float vacc1 = vx1 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x2()
55 float vacc = vx * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x2()
Dfma3-x16.c29 const __m256 vsixth = _mm256_broadcast_ps((const __m128*) params->sse.sixth); in xnn_f32_hswish_ukernel__fma3_x16() local
39 __m256 vacc01234567 = _mm256_fmadd_ps(vx01234567, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x16()
40 __m256 vacc89ABCDEF = _mm256_fmadd_ps(vx89ABCDEF, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x16()
58 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x16()
71 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x16()
Davx-x16.c29 const __m256 vsixth = _mm256_broadcast_ps((const __m128*) params->sse.sixth); in xnn_f32_hswish_ukernel__avx_x16() local
39 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vsixth); in xnn_f32_hswish_ukernel__avx_x16()
40 __m256 vacc89ABCDEF = _mm256_mul_ps(vx89ABCDEF, vsixth); in xnn_f32_hswish_ukernel__avx_x16()
61 __m256 vacc = _mm256_mul_ps(vx, vsixth); in xnn_f32_hswish_ukernel__avx_x16()
75 __m256 vacc = _mm256_mul_ps(vx, vsixth); in xnn_f32_hswish_ukernel__avx_x16()
Dpsimd-x4.c27 const psimd_f32 vsixth = psimd_load_splat_f32(&params->scalar.sixth); in xnn_f32_hswish_ukernel__psimd_x4() local
36 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4()
50 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4()
59 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4()
Davx512f-x16.c28 const __m512 vsixth = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.sixth)); in xnn_f32_hswish_ukernel__avx512f_x16() local
37 __m512 vacc0123456789ABCDEF = _mm512_fmadd_ps(vx0123456789ABCDEF, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x16()
51 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x16()
66 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x16()
Dneon-x4.c27 const float32x4_t vsixth = vld1q_dup_f32(&params->scalar.sixth); in xnn_f32_hswish_ukernel__neon_x4() local
35 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4()
47 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4()
55 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4()
Dneonfma-x4.c27 const float32x4_t vsixth = vld1q_dup_f32(&params->scalar.sixth); in xnn_f32_hswish_ukernel__neonfma_x4() local
35 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4()
47 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4()
55 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4()
Dsse-x4.c27 const __m128 vsixth = _mm_load_ps(params->sse.sixth); in xnn_f32_hswish_ukernel__sse_x4() local
36 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4()
52 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4()
62 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4()
Dfma3-x8.c29 const __m256 vsixth = _mm256_broadcast_ps((const __m128*) params->sse.sixth); in xnn_f32_hswish_ukernel__fma3_x8() local
38 __m256 vacc01234567 = _mm256_fmadd_ps(vx01234567, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x8()
52 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x8()
65 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x8()
Davx-x8.c29 const __m256 vsixth = _mm256_broadcast_ps((const __m128*) params->sse.sixth); in xnn_f32_hswish_ukernel__avx_x8() local
38 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vsixth); in xnn_f32_hswish_ukernel__avx_x8()
54 __m256 vacc = _mm256_mul_ps(vx, vsixth); in xnn_f32_hswish_ukernel__avx_x8()
68 __m256 vacc = _mm256_mul_ps(vx, vsixth); in xnn_f32_hswish_ukernel__avx_x8()
Dwasm-x1.c26 const float vsixth = params->scalar.sixth; in xnn_f32_hswish_ukernel__wasm_x1() local
34 float vacc = vx * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x1()
/external/XNNPACK/src/f32-hswish/
Dneon.c.in26 const float32x4_t vsixth = vld1q_dup_f32(&params->scalar.sixth);
37 float32x4_t vacc${ABC[N:N+4]} = vfmaq_f32(vhalf, vx${ABC[N:N+4]}, vsixth);
39 float32x4_t vacc${ABC[N:N+4]} = vmlaq_f32(vhalf, vx${ABC[N:N+4]}, vsixth);
57 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth);
59 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth);
68 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth);
70 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth);
Davx.c.in29 const __m256 vsixth = _mm256_broadcast_ps((const __m128*) params->sse.sixth); variable
42 __m256 vacc${ABC[N:N+8]} = _mm256_fmadd_ps(vx${ABC[N:N+8]}, vsixth, vhalf);
45 __m256 vacc${ABC[N:N+8]} = _mm256_mul_ps(vx${ABC[N:N+8]}, vsixth);
69 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf);
71 __m256 vacc = _mm256_mul_ps(vx, vsixth);
86 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf);
88 __m256 vacc = _mm256_mul_ps(vx, vsixth);
Dscalar.c.in26 const float vsixth = params->scalar.sixth;
39 float vacc${ABC[N]} = vx${ABC[N]} * vsixth + vhalf;
58 float vacc = vx * vsixth + vhalf;
67 float vacc = vx * vsixth + vhalf;
76 float vacc = vx * vsixth + vhalf;
Dsse.c.in26 const __m128 vsixth = _mm_load_ps(params->sse.sixth); variable
38 __m128 vacc${ABC[N:N+4]} = _mm_mul_ps(vx${ABC[N:N+4]}, vsixth);
61 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth);
71 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth);
Dpsimd.c.in26 const psimd_f32 vsixth = psimd_load_splat_f32(&params->scalar.sixth); variable
38 psimd_f32 vacc${ABC[N:N+4]} = psimd_qfma_f32(vhalf, vx${ABC[N:N+4]}, vsixth);
58 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth);
67 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth);
Davx512f.c.in27 const __m512 vsixth = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.sixth)); variable
39 __m512 vacc${ABC[N:N+16]} = _mm512_fmadd_ps(vx${ABC[N:N+16]}, vsixth, vhalf);
59 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf);
74 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf);

12