/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x3-wasmrelaxedsimd-fma.c | 70 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x3__wasmrelaxedsimd_fma() local 72 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x3__wasmrelaxedsimd_fma() 106 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x3__wasmrelaxedsimd_fma() local 107 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x3__wasmrelaxedsimd_fma() 131 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x3__wasmrelaxedsimd_fma() local 132 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x3__wasmrelaxedsimd_fma()
|
D | up8x3-wasmsimd.c | 70 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x3__wasmsimd() local 72 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x3__wasmsimd() 106 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x3__wasmsimd() local 107 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x3__wasmsimd() 131 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x3__wasmsimd() local 132 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x3__wasmsimd()
|
D | up8x3-minmax-wasmrelaxedsimd-fma.c | 72 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma() local 74 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma() 111 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma() local 112 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma() 137 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma() local 138 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma()
|
D | up8x3-minmax-wasmsimd-x86-acc2.c | 72 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86_acc2() local 74 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86_acc2() 114 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86_acc2() local 115 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86_acc2() 142 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86_acc2() local 143 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86_acc2()
|
D | up8x3-minmax-wasmrelaxedsimd-fma-acc2.c | 72 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma_acc2() local 74 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma_acc2() 114 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma_acc2() local 115 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma_acc2() 142 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma_acc2() local 143 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_fma_acc2()
|
D | up8x3-minmax-sse.c | 72 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse() local 74 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse() 110 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse() local 111 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse() 136 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse() local 137 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse()
|
D | up8x3-minmax-neon.c | 69 const float32x4_t vk1x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x3__neon() local 71 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neon() 99 const float32x4_t vk1x0123 = vld1q_f32(w + 12); in xnn_f32_dwconv_minmax_ukernel_up8x3__neon() local 100 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neon() 121 const float32x4_t vk1x0123 = vld1q_f32(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__neon() local 122 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neon()
|
D | up8x3-minmax-sse-acc2.c | 72 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse_acc2() local 74 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse_acc2() 113 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse_acc2() local 114 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse_acc2() 141 const __m128 vk1x0123 = _mm_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse_acc2() local 142 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__sse_acc2()
|
D | up8x3-minmax-neonfma.c | 69 const float32x4_t vk1x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma() local 71 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma() 99 const float32x4_t vk1x0123 = vld1q_f32(w + 12); in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma() local 100 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma() 121 const float32x4_t vk1x0123 = vld1q_f32(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma() local 122 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma()
|
D | up8x3-minmax-wasmsimd-x86.c | 72 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86() local 74 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86() 111 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86() local 112 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86() 137 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86() local 138 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_x86()
|
D | up8x3-minmax-neonfma-acc2.c | 69 const float32x4_t vk1x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma_acc2() local 71 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma_acc2() 102 const float32x4_t vk1x0123 = vld1q_f32(w + 12); in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma_acc2() local 103 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma_acc2() 126 const float32x4_t vk1x0123 = vld1q_f32(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma_acc2() local 127 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neonfma_acc2()
|
D | up8x3-minmax-wasmsimd-arm-acc2.c | 72 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm_acc2() local 74 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm_acc2() 114 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm_acc2() local 115 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm_acc2() 142 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm_acc2() local 143 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm_acc2()
|
D | up8x3-minmax-wasmrelaxedsimd.c | 72 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd() local 74 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd() 111 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd() local 112 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd() 137 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd() local 138 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd()
|
D | up8x3-minmax-neon-acc2.c | 69 const float32x4_t vk1x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x3__neon_acc2() local 71 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neon_acc2() 102 const float32x4_t vk1x0123 = vld1q_f32(w + 12); in xnn_f32_dwconv_minmax_ukernel_up8x3__neon_acc2() local 103 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neon_acc2() 126 const float32x4_t vk1x0123 = vld1q_f32(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__neon_acc2() local 127 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__neon_acc2()
|
D | up8x3-minmax-wasmrelaxedsimd-acc2.c | 72 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_acc2() local 74 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_acc2() 114 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_acc2() local 115 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_acc2() 142 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_acc2() local 143 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmrelaxedsimd_acc2()
|
D | up8x4-wasmrelaxedsimd-fma.c | 75 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x4__wasmrelaxedsimd_fma() local 77 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x4__wasmrelaxedsimd_fma() 120 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x4__wasmrelaxedsimd_fma() local 121 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x4__wasmrelaxedsimd_fma() 151 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x4__wasmrelaxedsimd_fma() local 152 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up8x4__wasmrelaxedsimd_fma()
|
D | up8x3-minmax-wasmsimd-arm.c | 72 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm() local 74 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm() 111 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm() local 112 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm() 137 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm() local 138 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x3__wasmsimd_arm()
|
D | up8x4-minmax-wasmrelaxedsimd-fma.c | 77 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_fma() local 79 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_fma() 125 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_fma() local 126 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_fma() 157 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_fma() local 158 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_fma()
|
D | up8x4-wasmsimd.c | 75 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local 77 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() 120 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local 121 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() 151 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd() local 152 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x4__wasmsimd()
|
D | up4x3-wasmsimd.c | 65 const v128_t vk1x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_ukernel_up4x3__wasmsimd() local 66 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up4x3__wasmsimd() 90 const v128_t vk1x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_ukernel_up4x3__wasmsimd() local 91 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up4x3__wasmsimd()
|
D | up4x3-wasmrelaxedsimd-fma.c | 65 const v128_t vk1x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_ukernel_up4x3__wasmrelaxedsimd_fma() local 66 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x3__wasmrelaxedsimd_fma() 90 const v128_t vk1x0123 = wasm_v128_load(w + 8); in xnn_f32_dwconv_ukernel_up4x3__wasmrelaxedsimd_fma() local 91 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_ukernel_up4x3__wasmrelaxedsimd_fma()
|
D | up8x4-minmax-wasmrelaxedsimd-acc2.c | 77 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_acc2() local 79 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_acc2() 128 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_acc2() local 129 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_acc2() 162 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_acc2() local 163 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmrelaxedsimd_acc2()
|
D | up8x4-minmax-wasmsimd-x86-acc2.c | 77 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() local 79 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() 128 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() local 129 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() 162 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() local 163 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2()
|
D | up8x4-minmax-wasmsimd-x86.c | 77 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() local 79 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() 125 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() local 126 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() 157 const v128_t vk1x0123 = wasm_v128_load(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() local 158 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86()
|
D | up8x4-minmax-neonfma-acc2.c | 74 const float32x4_t vk1x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() local 76 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() 114 const float32x4_t vk1x0123 = vld1q_f32(w + 12); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() local 115 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() 142 const float32x4_t vk1x0123 = vld1q_f32(w + 16); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() local 143 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2()
|