/external/XNNPACK/src/f32-prelu/gen/ |
D | wasmsimd-minmax-2x8.c | 55 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local 63 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 64 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 70 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 86 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local 91 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 92 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 95 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 108 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local 113 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() [all …]
|
D | sse-2x8.c | 55 __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse_2x8() local 63 __m128 vacc1x0123 = _mm_max_ps(_mm_setzero_ps(), vi1x0123); in xnn_f32_prelu_ukernel__sse_2x8() 64 vi1x0123 = _mm_min_ps(vi1x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x8() 70 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x8() 86 __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse_2x8() local 91 __m128 vacc1x0123 = _mm_max_ps(_mm_setzero_ps(), vi1x0123); in xnn_f32_prelu_ukernel__sse_2x8() 92 vi1x0123 = _mm_min_ps(vi1x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x8() 95 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x8() 108 __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse_2x8() local 113 __m128 vacc1x0123 = _mm_max_ps(_mm_setzero_ps(), vi1x0123); in xnn_f32_prelu_ukernel__sse_2x8() [all …]
|
D | sse41-2x8.c | 54 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse41_2x8() local 60 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8() 65 const __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x8() 81 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse41_2x8() local 85 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8() 88 __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x8() 101 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse41_2x8() local 105 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x8() 108 __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x8()
|
D | wasmsimd-minmax-2x16.c | 59 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local 73 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 74 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 86 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 108 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local 113 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 114 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 117 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 130 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local 135 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() [all …]
|
D | sse-2x4.c | 53 __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse_2x4() local 58 __m128 vacc1x0123 = _mm_max_ps(_mm_setzero_ps(), vi1x0123); in xnn_f32_prelu_ukernel__sse_2x4() 59 vi1x0123 = _mm_min_ps(vi1x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x4() 62 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x4() 75 __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse_2x4() local 80 __m128 vacc1x0123 = _mm_max_ps(_mm_setzero_ps(), vi1x0123); in xnn_f32_prelu_ukernel__sse_2x4() 81 vi1x0123 = _mm_min_ps(vi1x0123, vzero); in xnn_f32_prelu_ukernel__sse_2x4() 84 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__sse_2x4()
|
D | wasmsimd-minmax-2x4.c | 53 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() local 58 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 59 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 62 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 75 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() local 80 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 81 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 84 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
|
D | wasmsimd-bitselect-2x8.c | 55 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local 63 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 64 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 70 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 86 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local 91 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 92 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 95 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() 108 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() local 113 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x8() [all …]
|
D | neon-2x8.c | 52 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_2x8() local 59 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8() 60 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x8() 66 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x8() 79 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_2x8() local 84 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8() 85 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x8() 88 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x8() 98 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_2x8() local 103 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x8() [all …]
|
D | sse2-2x8.c | 54 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse2_2x8() local 62 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8() 63 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x8() 69 …vacc1x0123 = _mm_or_ps(_mm_and_ps(vprod1x0123, vmask1x0123), _mm_andnot_ps(vmask1x0123, vi1x0123)); in xnn_f32_prelu_ukernel__sse2_2x8() 85 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse2_2x8() local 90 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8() 91 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x8() 94 …vacc1x0123 = _mm_or_ps(_mm_and_ps(vprod1x0123, vmask1x0123), _mm_andnot_ps(vmask1x0123, vi1x0123)); in xnn_f32_prelu_ukernel__sse2_2x8() 107 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse2_2x8() local 112 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x8() [all …]
|
D | wasmsimd-minmax-4x8.c | 67 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local 81 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 82 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 96 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 122 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local 131 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 132 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 139 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 158 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local 167 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() [all …]
|
D | sse41-2x4.c | 52 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse41_2x4() local 56 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x4() 59 const __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x4() 72 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse41_2x4() local 76 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse41_2x4() 79 __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x4()
|
D | wasmsimd-bitselect-2x16.c | 59 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local 73 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 74 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 86 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 108 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local 113 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 114 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 117 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 130 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local 135 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() [all …]
|
D | wasmsimd-bitselect-2x4.c | 53 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() local 58 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() 59 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() 62 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() 75 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() local 80 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() 81 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4() 84 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x4()
|
D | neon-2x4.c | 50 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_2x4() local 54 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x4() 55 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x4() 58 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x4() 68 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_2x4() local 73 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x4() 74 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x4() 77 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x4()
|
D | neon-2x16.c | 56 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_2x16() local 69 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16() 70 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x16() 82 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x16() 101 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_2x16() local 106 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16() 107 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_2x16() 110 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_2x16() 120 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_2x16() local 125 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_2x16() [all …]
|
D | sse2-2x4.c | 52 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse2_2x4() local 57 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x4() 58 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x4() 61 …vacc1x0123 = _mm_or_ps(_mm_and_ps(vprod1x0123, vmask1x0123), _mm_andnot_ps(vmask1x0123, vi1x0123)); in xnn_f32_prelu_ukernel__sse2_2x4() 74 const __m128 vi1x0123 = _mm_loadu_ps(i1); in xnn_f32_prelu_ukernel__sse2_2x4() local 79 const __m128 vprod1x0123 = _mm_mul_ps(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__sse2_2x4() 80 …8 vmask1x0123 = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vi1x0123))); in xnn_f32_prelu_ukernel__sse2_2x4() 83 …vacc1x0123 = _mm_or_ps(_mm_and_ps(vprod1x0123, vmask1x0123), _mm_andnot_ps(vmask1x0123, vi1x0123)); in xnn_f32_prelu_ukernel__sse2_2x4()
|
D | wasmsimd-minmax-4x4.c | 65 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() local 74 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 75 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 82 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 101 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() local 110 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 111 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 118 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4()
|
D | wasmsimd-bitselect-4x8.c | 67 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() local 81 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 82 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 96 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 122 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() local 131 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 132 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 139 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 158 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() local 167 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() [all …]
|
D | wasmsimd-minmax-4x16.c | 71 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() local 95 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 96 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 124 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 164 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() local 173 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 174 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 181 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(vi1x0123, vw0123)); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 200 v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() local 209 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() [all …]
|
D | neon-4x8.c | 64 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_4x8() local 75 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x8() 76 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x8() 90 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x8() 111 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_4x8() local 120 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x8() 121 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x8() 128 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x8() 142 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_4x8() local 151 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x8() [all …]
|
D | neon-4x4.c | 62 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_4x4() local 68 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x4() 69 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x4() 76 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x4() 90 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_4x4() local 99 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x4() 100 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x4() 107 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x4()
|
D | wasmsimd-bitselect-4x4.c | 65 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4() local 74 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4() 75 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4() 82 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4() 101 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4() local 110 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4() 111 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4() 118 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x4()
|
D | wasmsimd-bitselect-4x16.c | 71 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() local 95 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 96 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 124 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 164 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() local 173 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 174 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 181 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vi1x0123, vmask1x0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 200 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() local 209 v128_t vacc1x0123 = wasm_f32x4_mul(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() [all …]
|
D | neon-4x16.c | 68 const float32x4_t vi1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_prelu_ukernel__neon_4x16() local 89 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x16() 90 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x16() 118 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x16() 153 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_4x16() local 162 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x16() 163 const uint32x4_t vm1x0123 = vcltq_s32(vreinterpretq_s32_f32(vi1x0123), vmovq_n_s32(0)); in xnn_f32_prelu_ukernel__neon_4x16() 170 vacc1x0123 = vbslq_f32(vm1x0123, vacc1x0123, vi1x0123); in xnn_f32_prelu_ukernel__neon_4x16() 184 const float32x4_t vi1x0123 = vld1q_f32(i1); in xnn_f32_prelu_ukernel__neon_4x16() local 193 float32x4_t vacc1x0123 = vmulq_f32(vi1x0123, vw0123); in xnn_f32_prelu_ukernel__neon_4x16() [all …]
|
/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x4-wasmsimd.c | 71 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local 77 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() 117 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local 121 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() 150 const v128_t vi1x0123 = wasm_v128_load(i1); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local 152 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd()
|