/external/XNNPACK/src/f32-hswish/gen/ |
D | scalar-x4.c | 26 const float vsixth = params->scalar.sixth; in xnn_f32_hswish_ukernel__scalar_x4() local 39 float vacc0 = vx0 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x4() 40 float vacc1 = vx1 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x4() 41 float vacc2 = vx2 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x4() 42 float vacc3 = vx3 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x4() 68 float vacc = vx * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x4()
|
D | wasm-x4.c | 26 const float vsixth = params->scalar.sixth; in xnn_f32_hswish_ukernel__wasm_x4() local 39 float vacc0 = vx0 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x4() 40 float vacc1 = vx1 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x4() 41 float vacc2 = vx2 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x4() 42 float vacc3 = vx3 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x4() 68 float vacc = vx * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x4()
|
D | neonfma-x8.c | 27 const float32x4_t vsixth = vld1q_dup_f32(¶ms->scalar.sixth); in xnn_f32_hswish_ukernel__neonfma_x8() local 36 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8() 37 float32x4_t vacc4567 = vfmaq_f32(vhalf, vx4567, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8() 53 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8() 61 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x8()
|
D | psimd-x8.c | 27 const psimd_f32 vsixth = psimd_load_splat_f32(¶ms->scalar.sixth); in xnn_f32_hswish_ukernel__psimd_x8() local 37 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8() 38 psimd_f32 vacc4567 = psimd_qfma_f32(vhalf, vx4567, vsixth); in xnn_f32_hswish_ukernel__psimd_x8() 56 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8() 65 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x8()
|
D | neon-x8.c | 27 const float32x4_t vsixth = vld1q_dup_f32(¶ms->scalar.sixth); in xnn_f32_hswish_ukernel__neon_x8() local 36 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8() 37 float32x4_t vacc4567 = vmlaq_f32(vhalf, vx4567, vsixth); in xnn_f32_hswish_ukernel__neon_x8() 53 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8() 61 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x8()
|
D | sse-x8.c | 27 const __m128 vsixth = _mm_load_ps(params->sse.sixth); in xnn_f32_hswish_ukernel__sse_x8() local 37 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8() 38 __m128 vacc4567 = _mm_mul_ps(vx4567, vsixth); in xnn_f32_hswish_ukernel__sse_x8() 59 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8() 69 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x8()
|
D | avx512f-x32.c | 28 const __m512 vsixth = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.sixth)); in xnn_f32_hswish_ukernel__avx512f_x32() local 38 __m512 vacc0123456789ABCDEF = _mm512_fmadd_ps(vx0123456789ABCDEF, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x32() 39 __m512 vaccGHIJKLMNOPQRSTUV = _mm512_fmadd_ps(vxGHIJKLMNOPQRSTUV, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x32() 57 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x32() 72 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x32()
|
D | wasm-x2.c | 26 const float vsixth = params->scalar.sixth; in xnn_f32_hswish_ukernel__wasm_x2() local 37 float vacc0 = vx0 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x2() 38 float vacc1 = vx1 * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x2() 55 float vacc = vx * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x2()
|
D | scalar-x2.c | 26 const float vsixth = params->scalar.sixth; in xnn_f32_hswish_ukernel__scalar_x2() local 37 float vacc0 = vx0 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x2() 38 float vacc1 = vx1 * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x2() 55 float vacc = vx * vsixth + vhalf; in xnn_f32_hswish_ukernel__scalar_x2()
|
D | fma3-x16.c | 29 const __m256 vsixth = _mm256_broadcast_ps((const __m128*) params->sse.sixth); in xnn_f32_hswish_ukernel__fma3_x16() local 39 __m256 vacc01234567 = _mm256_fmadd_ps(vx01234567, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x16() 40 __m256 vacc89ABCDEF = _mm256_fmadd_ps(vx89ABCDEF, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x16() 58 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x16() 71 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x16()
|
D | avx-x16.c | 29 const __m256 vsixth = _mm256_broadcast_ps((const __m128*) params->sse.sixth); in xnn_f32_hswish_ukernel__avx_x16() local 39 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vsixth); in xnn_f32_hswish_ukernel__avx_x16() 40 __m256 vacc89ABCDEF = _mm256_mul_ps(vx89ABCDEF, vsixth); in xnn_f32_hswish_ukernel__avx_x16() 61 __m256 vacc = _mm256_mul_ps(vx, vsixth); in xnn_f32_hswish_ukernel__avx_x16() 75 __m256 vacc = _mm256_mul_ps(vx, vsixth); in xnn_f32_hswish_ukernel__avx_x16()
|
D | psimd-x4.c | 27 const psimd_f32 vsixth = psimd_load_splat_f32(¶ms->scalar.sixth); in xnn_f32_hswish_ukernel__psimd_x4() local 36 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4() 50 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4() 59 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__psimd_x4()
|
D | avx512f-x16.c | 28 const __m512 vsixth = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.sixth)); in xnn_f32_hswish_ukernel__avx512f_x16() local 37 __m512 vacc0123456789ABCDEF = _mm512_fmadd_ps(vx0123456789ABCDEF, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x16() 51 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x16() 66 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__avx512f_x16()
|
D | neon-x4.c | 27 const float32x4_t vsixth = vld1q_dup_f32(¶ms->scalar.sixth); in xnn_f32_hswish_ukernel__neon_x4() local 35 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4() 47 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4() 55 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neon_x4()
|
D | neonfma-x4.c | 27 const float32x4_t vsixth = vld1q_dup_f32(¶ms->scalar.sixth); in xnn_f32_hswish_ukernel__neonfma_x4() local 35 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4() 47 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4() 55 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); in xnn_f32_hswish_ukernel__neonfma_x4()
|
D | sse-x4.c | 27 const __m128 vsixth = _mm_load_ps(params->sse.sixth); in xnn_f32_hswish_ukernel__sse_x4() local 36 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4() 52 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4() 62 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); in xnn_f32_hswish_ukernel__sse_x4()
|
D | fma3-x8.c | 29 const __m256 vsixth = _mm256_broadcast_ps((const __m128*) params->sse.sixth); in xnn_f32_hswish_ukernel__fma3_x8() local 38 __m256 vacc01234567 = _mm256_fmadd_ps(vx01234567, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x8() 52 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x8() 65 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf); in xnn_f32_hswish_ukernel__fma3_x8()
|
D | avx-x8.c | 29 const __m256 vsixth = _mm256_broadcast_ps((const __m128*) params->sse.sixth); in xnn_f32_hswish_ukernel__avx_x8() local 38 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vsixth); in xnn_f32_hswish_ukernel__avx_x8() 54 __m256 vacc = _mm256_mul_ps(vx, vsixth); in xnn_f32_hswish_ukernel__avx_x8() 68 __m256 vacc = _mm256_mul_ps(vx, vsixth); in xnn_f32_hswish_ukernel__avx_x8()
|
D | wasm-x1.c | 26 const float vsixth = params->scalar.sixth; in xnn_f32_hswish_ukernel__wasm_x1() local 34 float vacc = vx * vsixth + vhalf; in xnn_f32_hswish_ukernel__wasm_x1()
|
/external/XNNPACK/src/f32-hswish/ |
D | neon.c.in | 26 const float32x4_t vsixth = vld1q_dup_f32(¶ms->scalar.sixth); 37 float32x4_t vacc${ABC[N:N+4]} = vfmaq_f32(vhalf, vx${ABC[N:N+4]}, vsixth); 39 float32x4_t vacc${ABC[N:N+4]} = vmlaq_f32(vhalf, vx${ABC[N:N+4]}, vsixth); 57 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); 59 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth); 68 float32x4_t vacc0123 = vfmaq_f32(vhalf, vx0123, vsixth); 70 float32x4_t vacc0123 = vmlaq_f32(vhalf, vx0123, vsixth);
|
D | avx.c.in | 29 const __m256 vsixth = _mm256_broadcast_ps((const __m128*) params->sse.sixth); variable 42 __m256 vacc${ABC[N:N+8]} = _mm256_fmadd_ps(vx${ABC[N:N+8]}, vsixth, vhalf); 45 __m256 vacc${ABC[N:N+8]} = _mm256_mul_ps(vx${ABC[N:N+8]}, vsixth); 69 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf); 71 __m256 vacc = _mm256_mul_ps(vx, vsixth); 86 __m256 vacc = _mm256_fmadd_ps(vx, vsixth, vhalf); 88 __m256 vacc = _mm256_mul_ps(vx, vsixth);
|
D | scalar.c.in | 26 const float vsixth = params->scalar.sixth; 39 float vacc${ABC[N]} = vx${ABC[N]} * vsixth + vhalf; 58 float vacc = vx * vsixth + vhalf; 67 float vacc = vx * vsixth + vhalf; 76 float vacc = vx * vsixth + vhalf;
|
D | sse.c.in | 26 const __m128 vsixth = _mm_load_ps(params->sse.sixth); variable 38 __m128 vacc${ABC[N:N+4]} = _mm_mul_ps(vx${ABC[N:N+4]}, vsixth); 61 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth); 71 __m128 vacc0123 = _mm_mul_ps(vx0123, vsixth);
|
D | psimd.c.in | 26 const psimd_f32 vsixth = psimd_load_splat_f32(¶ms->scalar.sixth); variable 38 psimd_f32 vacc${ABC[N:N+4]} = psimd_qfma_f32(vhalf, vx${ABC[N:N+4]}, vsixth); 58 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth); 67 psimd_f32 vacc0123 = psimd_qfma_f32(vhalf, vx0123, vsixth);
|
D | avx512f.c.in | 27 const __m512 vsixth = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.sixth)); variable 39 __m512 vacc${ABC[N:N+16]} = _mm512_fmadd_ps(vx${ABC[N:N+16]}, vsixth, vhalf); 59 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf); 74 __m512 vacc = _mm512_fmadd_ps(vx, vsixth, vhalf);
|