/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x9-wasmrelaxedsimd-fma.c | 154 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_ukernel_up8x9__wasmrelaxedsimd_fma() local 156 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__wasmrelaxedsimd_fma() 226 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_ukernel_up8x9__wasmrelaxedsimd_fma() local 227 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__wasmrelaxedsimd_fma() 275 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_ukernel_up8x9__wasmrelaxedsimd_fma() local 276 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up8x9__wasmrelaxedsimd_fma()
|
D | up8x9-minmax-wasmrelaxedsimd-fma.c | 156 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma() local 158 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma() 231 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma() local 232 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma() 281 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma() local 282 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma()
|
D | up8x9-minmax-sse.c | 156 const __m128 vk7x0123 = _mm_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 158 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 230 const __m128 vk7x0123 = _mm_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 231 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() 280 const __m128 vk7x0123 = _mm_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local 281 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
|
D | up8x9-minmax-wasmsimd-arm.c | 156 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 158 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 231 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 232 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() 281 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local 282 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
|
D | up8x9-minmax-wasmrelaxedsimd-acc2.c | 156 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_acc2() local 158 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_acc2() 234 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_acc2() local 235 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_acc2() 286 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_acc2() local 287 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_acc2()
|
D | up8x9-wasmsimd.c | 154 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 156 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 226 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 227 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() 275 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local 276 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
|
D | up8x9-minmax-neonfma.c | 141 const float32x4_t vk7x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 143 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 195 const float32x4_t vk7x0123 = vld1q_f32(w + 60); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 196 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() 241 const float32x4_t vk7x0123 = vld1q_f32(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local 242 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
|
D | up8x9-minmax-neon-acc2.c | 141 const float32x4_t vk7x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 143 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 198 const float32x4_t vk7x0123 = vld1q_f32(w + 60); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 199 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 246 const float32x4_t vk7x0123 = vld1q_f32(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 247 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2()
|
D | up8x9-minmax-neon.c | 141 const float32x4_t vk7x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 143 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 195 const float32x4_t vk7x0123 = vld1q_f32(w + 60); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 196 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() 241 const float32x4_t vk7x0123 = vld1q_f32(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local 242 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
|
D | up8x9-minmax-wasmsimd-x86-acc2.c | 156 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 158 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 234 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 235 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 286 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 287 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2()
|
D | up8x9-minmax-wasmsimd-arm-acc2.c | 156 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 158 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 234 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 235 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 286 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 287 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2()
|
D | up8x9-minmax-wasmsimd-x86.c | 156 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 158 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 231 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 232 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() 281 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local 282 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
|
D | up8x9-minmax-wasmrelaxedsimd-fma-acc2.c | 156 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma_acc2() local 158 vacc0123p1 = __builtin_wasm_fma_f32x4(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma_acc2() 234 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma_acc2() local 235 vacc0123p1 = __builtin_wasm_fma_f32x4(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma_acc2() 286 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma_acc2() local 287 vacc0123p1 = __builtin_wasm_fma_f32x4(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd_fma_acc2()
|
D | up8x9-minmax-wasmrelaxedsimd.c | 156 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd() local 158 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd() 231 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd() local 232 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd() 281 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd() local 282 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmrelaxedsimd()
|
D | up8x9-minmax-neonfma-acc2.c | 141 const float32x4_t vk7x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 143 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 198 const float32x4_t vk7x0123 = vld1q_f32(w + 60); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 199 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 246 const float32x4_t vk7x0123 = vld1q_f32(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 247 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2()
|
D | up8x9-minmax-sse-acc2.c | 156 const __m128 vk7x0123 = _mm_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 158 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 233 const __m128 vk7x0123 = _mm_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 234 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 285 const __m128 vk7x0123 = _mm_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 286 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2()
|
D | up8x9-wasmsimd-acc2.c | 154 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd_acc2() local 156 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd_acc2() 229 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd_acc2() local 230 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd_acc2() 280 const v128_t vk7x0123 = wasm_v128_load(w + 64); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd_acc2() local 281 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd_acc2()
|
D | up4x9-wasmrelaxedsimd-fma.c | 131 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_ukernel_up4x9__wasmrelaxedsimd_fma() local 132 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x9__wasmrelaxedsimd_fma() 180 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_ukernel_up4x9__wasmrelaxedsimd_fma() local 181 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_ukernel_up4x9__wasmrelaxedsimd_fma()
|
D | up4x9-minmax-neon.c | 118 const float32x4_t vk7x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() local 119 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() 164 const float32x4_t vk7x0123 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() local 165 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
|
D | up4x9-minmax-wasmsimd-arm.c | 133 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() local 134 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() 184 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() local 185 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
|
D | up4x9-minmax-wasmrelaxedsimd-fma.c | 133 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd_fma() local 134 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd_fma() 184 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd_fma() local 185 vacc0123p0 = __builtin_wasm_fma_f32x4(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd_fma()
|
D | up4x9-minmax-wasmrelaxedsimd.c | 133 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd() local 134 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd() 184 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd() local 185 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd()
|
D | up4x9-minmax-wasmrelaxedsimd-fma-acc2.c | 133 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd_fma_acc2() local 134 vacc0123p1 = __builtin_wasm_fma_f32x4(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd_fma_acc2() 186 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd_fma_acc2() local 187 vacc0123p1 = __builtin_wasm_fma_f32x4(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmrelaxedsimd_fma_acc2()
|
D | up4x9-wasmsimd-acc2.c | 131 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd_acc2() local 132 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd_acc2() 182 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd_acc2() local 183 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd_acc2()
|
D | up4x9-wasmsimd.c | 131 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() local 132 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() 180 const v128_t vk7x0123 = wasm_v128_load(w + 32); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() local 181 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
|