/external/XNNPACK/src/f32-vlrelu/gen/ |
D | vlrelu-scalar-x4.c | 25 const float vslope = params->scalar.slope; in xnn_f32_vlrelu_ukernel__scalar_x4() local 34 float vacc0 = vx0 * vslope; in xnn_f32_vlrelu_ukernel__scalar_x4() 35 float vacc1 = vx1 * vslope; in xnn_f32_vlrelu_ukernel__scalar_x4() 36 float vacc2 = vx2 * vslope; in xnn_f32_vlrelu_ukernel__scalar_x4() 37 float vacc3 = vx3 * vslope; in xnn_f32_vlrelu_ukernel__scalar_x4() 53 float vacc = vx * vslope; in xnn_f32_vlrelu_ukernel__scalar_x4()
|
D | vlrelu-wasm-x4.c | 25 const float vslope = params->scalar.slope; in xnn_f32_vlrelu_ukernel__wasm_x4() local 40 float vacc0 = vnegx0 * vslope; in xnn_f32_vlrelu_ukernel__wasm_x4() 42 float vacc1 = vnegx1 * vslope; in xnn_f32_vlrelu_ukernel__wasm_x4() 44 float vacc2 = vnegx2 * vslope; in xnn_f32_vlrelu_ukernel__wasm_x4() 46 float vacc3 = vnegx3 * vslope; in xnn_f32_vlrelu_ukernel__wasm_x4() 64 float vacc = vnegx * vslope; in xnn_f32_vlrelu_ukernel__wasm_x4()
|
D | vlrelu-sse41-x8.c | 27 const __m128 vslope = _mm_load_ps(params->sse.slope); in xnn_f32_vlrelu_ukernel__sse41_x8() local 33 __m128 vacc0123 = _mm_mul_ps(vx0123, vslope); in xnn_f32_vlrelu_ukernel__sse41_x8() 34 __m128 vacc4567 = _mm_mul_ps(vx4567, vslope); in xnn_f32_vlrelu_ukernel__sse41_x8() 47 __m128 vacc = _mm_mul_ps(vx, vslope); in xnn_f32_vlrelu_ukernel__sse41_x8() 56 __m128 vacc = _mm_mul_ps(vx, vslope); in xnn_f32_vlrelu_ukernel__sse41_x8()
|
D | vlrelu-avx512f-x32.c | 28 const __m512 vslope = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.slope)); in xnn_f32_vlrelu_ukernel__avx512f_x32() local 39 …EF = _mm512_mask_mul_ps(vacc0123456789ABCDEF, vsign0123456789ABCDEF, vacc0123456789ABCDEF, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x32() 40 …UV = _mm512_mask_mul_ps(vaccGHIJKLMNOPQRSTUV, vsignGHIJKLMNOPQRSTUV, vaccGHIJKLMNOPQRSTUV, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x32() 50 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x32() 63 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x32()
|
D | vlrelu-scalar-x2.c | 25 const float vslope = params->scalar.slope; in xnn_f32_vlrelu_ukernel__scalar_x2() local 32 float vacc0 = vx0 * vslope; in xnn_f32_vlrelu_ukernel__scalar_x2() 33 float vacc1 = vx1 * vslope; in xnn_f32_vlrelu_ukernel__scalar_x2() 44 float vacc = vx * vslope; in xnn_f32_vlrelu_ukernel__scalar_x2()
|
D | vlrelu-wasmsimd-minmax-x8.c | 27 const v128_t vslope = wasm_v32x4_load_splat(¶ms->scalar.slope); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() local 39 vacc0123 = wasm_f32x4_add(vacc0123, wasm_f32x4_mul(vx0123, vslope)); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 40 vacc4567 = wasm_f32x4_add(vacc4567, wasm_f32x4_mul(vx4567, vslope)); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 51 vacc = wasm_f32x4_add(vacc, wasm_f32x4_mul(vx, vslope)); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 59 vacc = wasm_f32x4_add(vacc, wasm_f32x4_mul(vx, vslope)); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8()
|
D | vlrelu-sse-x8.c | 27 const __m128 vslope = _mm_load_ps(params->sse.slope); in xnn_f32_vlrelu_ukernel__sse_x8() local 39 vacc0123 = _mm_add_ps(vacc0123, _mm_mul_ps(vx0123, vslope)); in xnn_f32_vlrelu_ukernel__sse_x8() 40 vacc4567 = _mm_add_ps(vacc4567, _mm_mul_ps(vx4567, vslope)); in xnn_f32_vlrelu_ukernel__sse_x8() 52 vacc = _mm_add_ps(vacc, _mm_mul_ps(vx, vslope)); in xnn_f32_vlrelu_ukernel__sse_x8() 62 vacc = _mm_add_ps(vacc, _mm_mul_ps(vx, vslope)); in xnn_f32_vlrelu_ukernel__sse_x8()
|
D | vlrelu-avx-x16.c | 29 const __m256 vslope = _mm256_broadcast_ps((const __m128*) params->sse.slope); in xnn_f32_vlrelu_ukernel__avx_x16() local 35 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vslope); in xnn_f32_vlrelu_ukernel__avx_x16() 36 __m256 vacc89ABCDEF = _mm256_mul_ps(vx89ABCDEF, vslope); in xnn_f32_vlrelu_ukernel__avx_x16() 48 __m256 vacc = _mm256_mul_ps(vx, vslope); in xnn_f32_vlrelu_ukernel__avx_x16() 59 __m256 vacc = _mm256_mul_ps(vx, vslope); in xnn_f32_vlrelu_ukernel__avx_x16()
|
D | vlrelu-wasmsimd-bitselect-x8.c | 27 const v128_t vslope = wasm_v32x4_load_splat(¶ms->scalar.slope); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8() local 34 v128_t vacc0123 = wasm_f32x4_mul(vx0123, vslope); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8() 36 v128_t vacc4567 = wasm_f32x4_mul(vx4567, vslope); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8() 49 v128_t vacc = wasm_f32x4_mul(vx, vslope); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8() 57 v128_t vacc = wasm_f32x4_mul(vx, vslope); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x8()
|
D | vlrelu-neon-x8.c | 27 const float32x4_t vslope = vld1q_dup_f32(¶ms->scalar.slope); in xnn_f32_vlrelu_ukernel__neon_x8() local 33 float32x4_t vacc0123 = vmulq_f32(vx0123, vslope); in xnn_f32_vlrelu_ukernel__neon_x8() 35 float32x4_t vacc4567 = vmulq_f32(vx4567, vslope); in xnn_f32_vlrelu_ukernel__neon_x8() 46 float32x4_t vacc = vmulq_f32(vx, vslope); in xnn_f32_vlrelu_ukernel__neon_x8() 53 float32x4_t vacc = vmulq_f32(vx, vslope); in xnn_f32_vlrelu_ukernel__neon_x8()
|
D | vlrelu-sse2-x8.c | 27 const __m128 vslope = _mm_load_ps(params->sse.slope); in xnn_f32_vlrelu_ukernel__sse2_x8() local 33 __m128 vacc0123 = _mm_mul_ps(vx0123, vslope); in xnn_f32_vlrelu_ukernel__sse2_x8() 35 __m128 vacc4567 = _mm_mul_ps(vx4567, vslope); in xnn_f32_vlrelu_ukernel__sse2_x8() 49 __m128 vacc = _mm_mul_ps(vx, vslope); in xnn_f32_vlrelu_ukernel__sse2_x8() 59 __m128 vacc = _mm_mul_ps(vx, vslope); in xnn_f32_vlrelu_ukernel__sse2_x8()
|
D | vlrelu-wasm-x2.c | 25 const float vslope = params->scalar.slope; in xnn_f32_vlrelu_ukernel__wasm_x2() local 36 float vacc0 = vnegx0 * vslope; in xnn_f32_vlrelu_ukernel__wasm_x2() 38 float vacc1 = vnegx1 * vslope; in xnn_f32_vlrelu_ukernel__wasm_x2() 51 float vacc = vnegx * vslope; in xnn_f32_vlrelu_ukernel__wasm_x2()
|
D | vlrelu-sse41-x4.c | 27 const __m128 vslope = _mm_load_ps(params->sse.slope); in xnn_f32_vlrelu_ukernel__sse41_x4() local 32 __m128 vacc0123 = _mm_mul_ps(vx0123, vslope); in xnn_f32_vlrelu_ukernel__sse41_x4() 42 __m128 vacc = _mm_mul_ps(vx, vslope); in xnn_f32_vlrelu_ukernel__sse41_x4()
|
D | vlrelu-wasmsimd-minmax-x4.c | 27 const v128_t vslope = wasm_v32x4_load_splat(¶ms->scalar.slope); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x4() local 34 vacc = wasm_f32x4_add(vacc, wasm_f32x4_mul(vx, vslope)); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x4() 42 vacc = wasm_f32x4_add(vacc, wasm_f32x4_mul(vx, vslope)); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x4()
|
D | vlrelu-avx512f-x16.c | 28 const __m512 vslope = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.slope)); in xnn_f32_vlrelu_ukernel__avx512f_x16() local 37 …EF = _mm512_mask_mul_ps(vacc0123456789ABCDEF, vsign0123456789ABCDEF, vacc0123456789ABCDEF, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x16() 51 vacc = _mm512_mask_mul_ps(vacc, vsign, vacc, vslope); in xnn_f32_vlrelu_ukernel__avx512f_x16()
|
D | vlrelu-sse-x4.c | 27 const __m128 vslope = _mm_load_ps(params->sse.slope); in xnn_f32_vlrelu_ukernel__sse_x4() local 36 vacc0123 = _mm_add_ps(vacc0123, _mm_mul_ps(vx0123, vslope)); in xnn_f32_vlrelu_ukernel__sse_x4() 46 vacc = _mm_add_ps(vacc, _mm_mul_ps(vx, vslope)); in xnn_f32_vlrelu_ukernel__sse_x4()
|
D | vlrelu-wasmsimd-bitselect-x4.c | 27 const v128_t vslope = wasm_v32x4_load_splat(¶ms->scalar.slope); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x4() local 32 v128_t vacc = wasm_f32x4_mul(vx, vslope); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x4() 40 v128_t vacc = wasm_f32x4_mul(vx, vslope); in xnn_f32_vlrelu_ukernel__wasmsimd_bitselect_x4()
|
D | vlrelu-neon-x4.c | 27 const float32x4_t vslope = vld1q_dup_f32(¶ms->scalar.slope); in xnn_f32_vlrelu_ukernel__neon_x4() local 32 float32x4_t vacc0123 = vmulq_f32(vx0123, vslope); in xnn_f32_vlrelu_ukernel__neon_x4() 41 float32x4_t vacc = vmulq_f32(vx, vslope); in xnn_f32_vlrelu_ukernel__neon_x4()
|
D | vlrelu-sse2-x4.c | 27 const __m128 vslope = _mm_load_ps(params->sse.slope); in xnn_f32_vlrelu_ukernel__sse2_x4() local 32 __m128 vacc0123 = _mm_mul_ps(vx0123, vslope); in xnn_f32_vlrelu_ukernel__sse2_x4() 43 __m128 vacc = _mm_mul_ps(vx, vslope); in xnn_f32_vlrelu_ukernel__sse2_x4()
|
D | vlrelu-avx-x8.c | 29 const __m256 vslope = _mm256_broadcast_ps((const __m128*) params->sse.slope); in xnn_f32_vlrelu_ukernel__avx_x8() local 34 __m256 vacc01234567 = _mm256_mul_ps(vx01234567, vslope); in xnn_f32_vlrelu_ukernel__avx_x8() 47 __m256 vacc = _mm256_mul_ps(vx, vslope); in xnn_f32_vlrelu_ukernel__avx_x8()
|
/external/XNNPACK/src/f32-vlrelu/ |
D | scalar.c.in | 23 const float vslope = params->scalar.slope; variable 32 float vacc${ABC[N]} = vx${ABC[N]} * vslope; 45 float vacc = vx * vslope; 52 float vacc = vx * vslope; 59 float vacc = vx * vslope;
|
D | wasm.c.in | 23 const float vslope = params->scalar.slope; variable 36 float vacc${ABC[N]} = vnegx${ABC[N]} * vslope; 51 float vacc = vnegx * vslope; 60 float vacc = vnegx * vslope; 69 float vacc = vnegx * vslope;
|
D | sse.c.in | 29 const __m128 vslope = _mm_load_ps(params->sse.slope); variable 48 __m128 vacc${ABC[N:N+4]} = _mm_mul_ps(vx${ABC[N:N+4]}, vslope); 54 vacc${ABC[N:N+4]} = _mm_add_ps(vacc${ABC[N:N+4]}, _mm_mul_ps(vx${ABC[N:N+4]}, vslope)); 76 vacc = _mm_add_ps(vacc, _mm_mul_ps(vx, vslope)); 78 __m128 vacc = _mm_mul_ps(vx, vslope); 94 vacc = _mm_add_ps(vacc, _mm_mul_ps(vx, vslope)); 98 __m128 vacc = _mm_mul_ps(vx, vslope);
|
D | neon.c.in | 26 const float32x4_t vslope = vld1q_dup_f32(¶ms->scalar.slope); variable 33 float32x4_t vacc${ABC[N:N+4]} = vmulq_f32(vx${ABC[N:N+4]}, vslope); 45 float32x4_t vacc = vmulq_f32(vx, vslope); 52 float32x4_t vacc = vmulq_f32(vx, vslope);
|
D | wasmsimd-minmax.c.in | 26 const v128_t vslope = wasm_v32x4_load_splat(¶ms->scalar.slope); variable 40 … vacc${ABC[N:N+4]} = wasm_f32x4_add(vacc${ABC[N:N+4]}, wasm_f32x4_mul(vx${ABC[N:N+4]}, vslope)); 52 vacc = wasm_f32x4_add(vacc, wasm_f32x4_mul(vx, vslope)); 60 vacc = wasm_f32x4_add(vacc, wasm_f32x4_mul(vx, vslope));
|