Home
last modified time | relevance | path

Searched refs:vi1x0123 (Results 1 – 25 of 238) sorted by relevance

12345678910

/external/XNNPACK/src/f32-prelu/gen/
Dwasmsimd-minmax-2x8.c55 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local
63 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
64 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
70 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
86 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local
91 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
92 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
95 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
108 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local
113 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8()
[all …]
Dsse-2x8.c55 __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse_2x8() local
63 __m128 vacc1x0123 = _mm_max_ps(_mm_setzero_ps(), vi1x0123); in xnn_f32_prelu_ukernel__sse_2x8()
64 vi1x0123 = _mm_min_ps(vi1x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x8()
70 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x8()
86 __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse_2x8() local
91 __m128 vacc1x0123 = _mm_max_ps(_mm_setzero_ps(), vi1x0123); in xnn_f32_prelu_ukernel__sse_2x8()
92 vi1x0123 = _mm_min_ps(vi1x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x8()
95 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x8()
108 __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse_2x8() local
113 __m128 vacc1x0123 = _mm_max_ps(_mm_setzero_ps(), vi1x0123); in xnn_f32_prelu_ukernel__sse_2x8()
[all …]
Dsse41-2x8.c54 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse41_2x8() local
60 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8()
65 const __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x8()
81 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse41_2x8() local
85 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8()
88 __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x8()
101 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse41_2x8() local
105 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8()
108 __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x8()
Dwasmsimd-minmax-2x16.c59 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local
73 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
74 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
86 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
108 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local
113 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
114 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
117 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
130 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local
135 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16()
[all …]
Dsse-2x4.c53 __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse_2x4() local
58 __m128 vacc1x0123 = _mm_max_ps(_mm_setzero_ps(), vi1x0123); in xnn_f32_prelu_ukernel__sse_2x4()
59 vi1x0123 = _mm_min_ps(vi1x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x4()
62 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x4()
75 __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse_2x4() local
80 __m128 vacc1x0123 = _mm_max_ps(_mm_setzero_ps(), vi1x0123); in xnn_f32_prelu_ukernel__sse_2x4()
81 vi1x0123 = _mm_min_ps(vi1x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x4()
84 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x4()
Dwasmsimd-minmax-2x4.c53 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() local
58 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
59 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
62 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
75 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() local
80 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
81 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
84 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
Dwasmsimd-bitselect-2x8.c55 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local
63 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
64 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
70 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
86 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local
91 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
92 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
95 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
108 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local
113 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8()
[all …]
Dneon-2x8.c52 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_2x8() local
59 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8()
60 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x8()
66 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x8()
79 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_2x8() local
84 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8()
85 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x8()
88 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x8()
98 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_2x8() local
103 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8()
[all …]
Dsse2-2x8.c54 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse2_2x8() local
62 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8()
63 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x8()
69 …vacc1x0123 = _mm_or_ps(_mm_and_ps(vprod1x0123, vmask1x0123), _mm_andnot_ps(vmask1x0123, vi1x0123)); in xnn_f32_prelu_ukernel__sse2_2x8()
85 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse2_2x8() local
90 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8()
91 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x8()
94 …vacc1x0123 = _mm_or_ps(_mm_and_ps(vprod1x0123, vmask1x0123), _mm_andnot_ps(vmask1x0123, vi1x0123)); in xnn_f32_prelu_ukernel__sse2_2x8()
107 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse2_2x8() local
112 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8()
[all …]
Dwasmsimd-minmax-4x8.c67 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local
81 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
82 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
96 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
122 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local
131 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
132 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
139 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
158 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local
167 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8()
[all …]
Dsse41-2x4.c52 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse41_2x4() local
56 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x4()
59 const __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x4()
72 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse41_2x4() local
76 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x4()
79 __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x4()
Dwasmsimd-bitselect-2x16.c59 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local
73 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
74 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
86 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
108 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local
113 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
114 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
117 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
130 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local
135 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16()
[all …]
Dwasmsimd-bitselect-2x4.c53 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() local
58 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
59 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
62 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
75 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() local
80 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
81 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
84 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
Dneon-2x4.c50 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_2x4() local
54 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x4()
55 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x4()
58 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x4()
68 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_2x4() local
73 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x4()
74 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x4()
77 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x4()
Dneon-2x16.c56 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_2x16() local
69 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16()
70 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x16()
82 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x16()
101 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_2x16() local
106 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16()
107 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x16()
110 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x16()
120 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_2x16() local
125 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16()
[all …]
Dsse2-2x4.c52 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse2_2x4() local
57 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x4()
58 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x4()
61 …vacc1x0123 = _mm_or_ps(_mm_and_ps(vprod1x0123, vmask1x0123), _mm_andnot_ps(vmask1x0123, vi1x0123)); in xnn_f32_prelu_ukernel__sse2_2x4()
74 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse2_2x4() local
79 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x4()
80 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x4()
83 …vacc1x0123 = _mm_or_ps(_mm_and_ps(vprod1x0123, vmask1x0123), _mm_andnot_ps(vmask1x0123, vi1x0123)); in xnn_f32_prelu_ukernel__sse2_2x4()
Dwasmsimd-minmax-4x4.c65 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() local
74 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4()
75 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4()
82 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4()
101 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() local
110 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4()
111 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4()
118 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4()
Dwasmsimd-bitselect-4x8.c67 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() local
81 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8()
82 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8()
96 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8()
122 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() local
131 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8()
132 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8()
139 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8()
158 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() local
167 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8()
[all …]
Dwasmsimd-minmax-4x16.c71 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() local
95 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16()
96 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16()
124 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16()
164 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() local
173 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16()
174 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16()
181 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16()
200 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() local
209 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16()
[all …]
Dneon-4x8.c64 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_4x8() local
75 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x8()
76 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x8()
90 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x8()
111 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_4x8() local
120 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x8()
121 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x8()
128 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x8()
142 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_4x8() local
151 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x8()
[all …]
Dneon-4x4.c62 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_4x4() local
68 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x4()
69 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x4()
76 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x4()
90 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_4x4() local
99 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x4()
100 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x4()
107 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x4()
Dwasmsimd-bitselect-4x4.c65 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4() local
74 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4()
75 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4()
82 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4()
101 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4() local
110 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4()
111 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4()
118 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4()
Dwasmsimd-bitselect-4x16.c71 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() local
95 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16()
96 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16()
124 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16()
164 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() local
173 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16()
174 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16()
181 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16()
200 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() local
209 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16()
[all …]
Dneon-4x16.c68 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_4x16() local
89 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x16()
90 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x16()
118 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x16()
153 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_4x16() local
162 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x16()
163 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x16()
170 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x16()
184 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_4x16() local
193 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x16()
[all …]
/external/XNNPACK/src/f32-dwconv/gen/
Dup8x4-wasmsimd.c71 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local
77 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd()
117 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local
121 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd()
150 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local
152 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd()

12345678910