/external/XNNPACK/src/f32-prelu/gen/ |
D | wasmsimd-minmax-1x8.c | 46 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() local 50 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 51 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 55 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 66 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() local 69 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 70 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 72 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 81 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() local 84 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() [all …]
|
D | wasmsimd-minmax-1x16.c | 48 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() local 54 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 55 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 63 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 78 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() local 81 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 82 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 84 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 93 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() local 96 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() [all …]
|
D | wasmsimd-bitselect-1x8.c | 46 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() local 50 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() 51 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() 55 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() 66 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() local 69 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() 70 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() 72 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() 81 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() local 84 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x8() [all …]
|
D | neon-1x8.c | 44 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_1x8() local 47 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x8() 48 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x8() 52 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x8() 61 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_1x8() local 64 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x8() 65 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x8() 67 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x8() 74 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_1x8() local 77 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x8() [all …]
|
D | wasmsimd-minmax-1x4.c | 45 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4() local 48 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4() 49 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4() 51 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4() 60 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4() local 63 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4() 64 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4() 66 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x4()
|
D | wasmsimd-minmax-2x8.c | 52 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local 59 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 60 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 68 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 84 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local 89 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 90 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 94 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 106 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local 111 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() [all …]
|
D | sse-2x8.c | 52 __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse_2x8() local 59 __m128 vacc0x0123 = _mm_max_ps(_mm_setzero_ps(), vi0x0123); in xnn_f32_prelu_ukernel__sse_2x8() 60 vi0x0123 = _mm_min_ps(vi0x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x8() 68 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x8() 84 __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse_2x8() local 89 __m128 vacc0x0123 = _mm_max_ps(_mm_setzero_ps(), vi0x0123); in xnn_f32_prelu_ukernel__sse_2x8() 90 vi0x0123 = _mm_min_ps(vi0x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x8() 94 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x8() 106 __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse_2x8() local 111 __m128 vacc0x0123 = _mm_max_ps(_mm_setzero_ps(), vi0x0123); in xnn_f32_prelu_ukernel__sse_2x8() [all …]
|
D | wasmsimd-bitselect-1x16.c | 48 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() local 54 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() 55 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() 63 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() 78 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() local 81 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() 82 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() 84 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() 93 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() local 96 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x16() [all …]
|
D | neon-1x16.c | 46 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_1x16() local 51 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x16() 52 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x16() 60 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x16() 73 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_1x16() local 76 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x16() 77 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x16() 79 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x16() 86 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_1x16() local 89 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x16() [all …]
|
D | sse41-2x8.c | 51 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x8() local 58 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8() 63 const __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8() 79 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x8() local 84 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8() 87 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8() 99 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x8() local 104 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8() 107 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8()
|
D | wasmsimd-minmax-2x16.c | 54 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local 65 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 66 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 82 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 106 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local 111 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 112 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 116 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 128 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local 133 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() [all …]
|
D | sse-2x4.c | 51 __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse_2x4() local 56 __m128 vacc0x0123 = _mm_max_ps(_mm_setzero_ps(), vi0x0123); in xnn_f32_prelu_ukernel__sse_2x4() 57 vi0x0123 = _mm_min_ps(vi0x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x4() 61 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x4() 73 __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse_2x4() local 78 __m128 vacc0x0123 = _mm_max_ps(_mm_setzero_ps(), vi0x0123); in xnn_f32_prelu_ukernel__sse_2x4() 79 vi0x0123 = _mm_min_ps(vi0x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x4() 83 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x4()
|
D | wasmsimd-minmax-2x4.c | 51 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() local 56 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 57 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 61 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 73 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() local 78 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 79 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 83 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
|
D | neon-1x4.c | 43 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_1x4() local 45 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x4() 46 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x4() 48 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x4() 55 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_1x4() local 58 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_1x4() 59 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_1x4() 61 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_1x4()
|
D | wasmsimd-bitselect-1x4.c | 45 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4() local 48 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4() 49 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4() 51 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4() 60 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4() local 63 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4() 64 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4() 66 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_1x4()
|
D | wasmsimd-bitselect-2x8.c | 52 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local 59 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 60 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 68 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 84 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local 89 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 90 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 94 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 106 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local 111 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() [all …]
|
D | neon-2x8.c | 50 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_2x8() local 55 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8() 56 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x8() 64 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x8() 77 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x8() local 82 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8() 83 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x8() 87 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x8() 96 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x8() local 101 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8() [all …]
|
D | sse2-2x8.c | 51 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x8() local 58 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8() 59 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8() 67 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x8() 83 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x8() local 88 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8() 89 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x8() 93 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x8() 105 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x8() local 110 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8() [all …]
|
D | wasmsimd-minmax-4x8.c | 64 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local 77 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 78 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 94 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 120 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local 129 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 130 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 138 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(vi0x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 156 v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local 165 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() [all …]
|
D | sse41-2x4.c | 50 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x4() local 55 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x4() 58 const __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x4() 70 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse41_2x4() local 75 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x4() 78 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x4()
|
D | wasmsimd-bitselect-2x16.c | 54 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local 65 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 66 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 82 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 106 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local 111 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 112 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 116 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 128 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local 133 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() [all …]
|
D | wasmsimd-bitselect-2x4.c | 51 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() local 56 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() 57 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() 61 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() 73 const v128_t vi0x0123 = wasm_v128_load(i0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() local 78 v128_t vacc0x0123 = wasm_f32x4_mul(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() 79 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() 83 vacc0x0123 = wasm_v128_bitselect(vacc0x0123, vi0x0123, vmask0x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
|
D | neon-2x4.c | 49 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_2x4() local 52 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x4() 53 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x4() 57 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x4() 66 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x4() local 71 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x4() 72 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x4() 76 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x4()
|
D | neon-2x16.c | 52 const float32x4_t vi0x0123 = vld1q_f32(i0); i0 += 4; in xnn_f32_prelu_ukernel__neon_2x16() local 61 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16() 62 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x16() 78 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x16() 99 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x16() local 104 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16() 105 const uint32x4_t vm0x0123 = vcltq_s32(vreinterpretq_s32_f32(vi0x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x16() 109 vacc0x0123 = vbslq_f32(vm0x0123, vacc0x0123, vi0x0123); in xnn_f32_prelu_ukernel__neon_2x16() 118 const float32x4_t vi0x0123 = vld1q_f32(i0); in xnn_f32_prelu_ukernel__neon_2x16() local 123 float32x4_t vacc0x0123 = vmulq_f32(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16() [all …]
|
D | sse2-2x4.c | 50 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x4() local 55 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x4() 56 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x4() 60 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x4() 72 const __m128 vi0x0123 = _mm_loadu_ps(i0); in xnn_f32_prelu_ukernel__sse2_2x4() local 77 const __m128 vprod0x0123 = _mm_mul_ps(vi0x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x4() 78 …8 vmask0x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi0x0123))); in xnn_f32_prelu_ukernel__sse2_2x4() 82 …vacc0x0123 = _mm_or_ps(_mm_and_ps(vprod0x0123, vmask0x0123), _mm_andnot_ps(vmask0x0123, vi0x0123)); in xnn_f32_prelu_ukernel__sse2_2x4()
|