Home
last modified time | relevance | path

Searched refs:vi0x0123 (Results 1 – 25 of 247) sorted by relevance

12345678910

/external/XNNPACK/src/f32-prelu/gen/
Dwasmsimd-minmax-1x8.c46 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() local
50 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8()
51 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8()
55 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8()
66 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() local
69 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8()
70 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8()
72 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8()
81 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() local
84 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8()
[all …]
Dwasmsimd-minmax-1x16.c48 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() local
54 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16()
55 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16()
63 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16()
78 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() local
81 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16()
82 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16()
84 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16()
93 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() local
96 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16()
[all …]
Dwasmsimd-bitselect-1x8.c46 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() local
50 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8()
51 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8()
55 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8()
66 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() local
69 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8()
70 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8()
72 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8()
81 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() local
84 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8()
[all …]
Dneon-1x8.c44 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_1x8() local
47 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x8()
48 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x8()
52 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x8()
61 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_1x8() local
64 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x8()
65 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x8()
67 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x8()
74 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_1x8() local
77 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x8()
[all …]
Dwasmsimd-minmax-1x4.c45 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4() local
48 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4()
49 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4()
51 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4()
60 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4() local
63 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4()
64 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4()
66 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4()
Dwasmsimd-minmax-2x8.c52 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local
59 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
60 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
68 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
84 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local
89 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
90 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
94 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
106 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local
111 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
[all …]
Dsse-2x8.c52 __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse_2x8() local
59 __m128 vacc0x0123 = _mm_max_ps(_mm_setzero_ps(), vi0x0123); in xnn_f32_prelu_ukernel__sse_2x8()
60 vi0x0123 = _mm_min_ps(vi0x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x8()
68 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x8()
84 __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse_2x8() local
89 __m128 vacc0x0123 = _mm_max_ps(_mm_setzero_ps(), vi0x0123); in xnn_f32_prelu_ukernel__sse_2x8()
90 vi0x0123 = _mm_min_ps(vi0x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x8()
94 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x8()
106 __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse_2x8() local
111 __m128 vacc0x0123 = _mm_max_ps(_mm_setzero_ps(), vi0x0123); in xnn_f32_prelu_ukernel__sse_2x8()
[all …]
Dwasmsimd-bitselect-1x16.c48 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() local
54 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16()
55 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16()
63 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16()
78 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() local
81 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16()
82 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16()
84 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16()
93 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() local
96 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16()
[all …]
Dneon-1x16.c46 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_1x16() local
51 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x16()
52 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x16()
60 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x16()
73 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_1x16() local
76 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x16()
77 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x16()
79 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x16()
86 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_1x16() local
89 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x16()
[all …]
Dsse41-2x8.c51 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x8() local
58 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8()
63 const __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8()
79 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x8() local
84 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8()
87 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8()
99 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x8() local
104 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8()
107 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8()
Dwasmsimd-minmax-2x16.c54 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local
65 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
66 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
82 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
106 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local
111 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
112 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
116 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
128 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local
133 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
[all …]
Dsse-2x4.c51 __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse_2x4() local
56 __m128 vacc0x0123 = _mm_max_ps(_mm_setzero_ps(), vi0x0123); in xnn_f32_prelu_ukernel__sse_2x4()
57 vi0x0123 = _mm_min_ps(vi0x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x4()
61 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x4()
73 __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse_2x4() local
78 __m128 vacc0x0123 = _mm_max_ps(_mm_setzero_ps(), vi0x0123); in xnn_f32_prelu_ukernel__sse_2x4()
79 vi0x0123 = _mm_min_ps(vi0x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x4()
83 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x4()
Dwasmsimd-minmax-2x4.c51 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() local
56 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
57 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
61 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
73 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() local
78 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
79 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
83 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
Dneon-1x4.c43 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_1x4() local
45 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x4()
46 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x4()
48 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x4()
55 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_1x4() local
58 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x4()
59 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x4()
61 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x4()
Dwasmsimd-bitselect-1x4.c45 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4() local
48 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4()
49 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4()
51 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4()
60 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4() local
63 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4()
64 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4()
66 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4()
Dwasmsimd-bitselect-2x8.c52 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local
59 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
60 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
68 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
84 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local
89 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
90 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
94 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
106 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local
111 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
[all …]
Dneon-2x8.c50 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_2x8() local
55 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8()
56 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x8()
64 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x8()
77 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x8() local
82 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8()
83 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x8()
87 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x8()
96 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x8() local
101 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8()
[all …]
Dsse2-2x8.c51 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x8() local
58 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8()
59 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8()
67 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x8()
83 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x8() local
88 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8()
89 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8()
93 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x8()
105 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x8() local
110 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8()
[all …]
Dwasmsimd-minmax-4x8.c64 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local
77 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
78 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
94 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
120 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local
129 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
130 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
138 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
156 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local
165 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
[all …]
Dsse41-2x4.c50 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x4() local
55 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x4()
58 const __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x4()
70 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x4() local
75 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x4()
78 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x4()
Dwasmsimd-bitselect-2x16.c54 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local
65 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
66 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
82 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
106 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local
111 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
112 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
116 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
128 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local
133 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
[all …]
Dwasmsimd-bitselect-2x4.c51 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() local
56 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
57 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
61 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
73 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() local
78 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
79 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
83 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
Dneon-2x4.c49 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_2x4() local
52 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x4()
53 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x4()
57 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x4()
66 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x4() local
71 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x4()
72 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x4()
76 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x4()
Dneon-2x16.c52 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_2x16() local
61 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16()
62 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x16()
78 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x16()
99 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x16() local
104 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16()
105 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x16()
109 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x16()
118 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x16() local
123 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16()
[all …]
Dsse2-2x4.c50 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x4() local
55 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x4()
56 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x4()
60 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x4()
72 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x4() local
77 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x4()
78 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x4()
82 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x4()

12345678910