/external/XNNPACK/src/f32-dwconv/gen/ |
D | up4x25-minmax-wasmsimd-x86-acc2.c | 178 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() local 190 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() 202 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() 214 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() 226 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi9x0123, vk9x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() 238 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi11x0123, vk11x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() 250 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi13x0123, vk13x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() 262 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi15x0123, vk15x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() 274 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi17x0123, vk17x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() 286 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi19x0123, vk19x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() [all …]
|
D | up4x25-minmax-sse-acc2.c | 178 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() local 190 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() 202 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() 214 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() 226 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi9x0123, vk9x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() 238 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi11x0123, vk11x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() 250 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi13x0123, vk13x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() 262 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi15x0123, vk15x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() 274 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi17x0123, vk17x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() 286 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi19x0123, vk19x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() [all …]
|
D | up4x25-minmax-neonfma-acc2.c | 175 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() local 183 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 191 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 199 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 207 vacc0123p1 = vfmaq_f32(vacc0123p1, vi9x0123, vk9x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 215 vacc0123p1 = vfmaq_f32(vacc0123p1, vi11x0123, vk11x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 223 vacc0123p1 = vfmaq_f32(vacc0123p1, vi13x0123, vk13x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 231 vacc0123p1 = vfmaq_f32(vacc0123p1, vi15x0123, vk15x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 239 vacc0123p1 = vfmaq_f32(vacc0123p1, vi17x0123, vk17x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 247 vacc0123p1 = vfmaq_f32(vacc0123p1, vi19x0123, vk19x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() [all …]
|
D | up4x25-minmax-wasmsimd-arm-acc2.c | 178 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() local 190 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() 202 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() 214 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() 226 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi9x0123, vk9x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() 238 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi11x0123, vk11x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() 250 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi13x0123, vk13x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() 262 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi15x0123, vk15x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() 274 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi17x0123, vk17x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() 286 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi19x0123, vk19x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() [all …]
|
D | up4x25-minmax-neon-acc2.c | 175 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() local 183 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 191 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 199 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 207 vacc0123p1 = vmlaq_f32(vacc0123p1, vi9x0123, vk9x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 215 vacc0123p1 = vmlaq_f32(vacc0123p1, vi11x0123, vk11x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 223 vacc0123p1 = vmlaq_f32(vacc0123p1, vi13x0123, vk13x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 231 vacc0123p1 = vmlaq_f32(vacc0123p1, vi15x0123, vk15x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 239 vacc0123p1 = vmlaq_f32(vacc0123p1, vi17x0123, vk17x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 247 vacc0123p1 = vmlaq_f32(vacc0123p1, vi19x0123, vk19x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() [all …]
|
D | up8x25-minmax-neonfma-acc2.c | 181 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() local 195 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 209 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 223 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 237 vacc0123p1 = vfmaq_f32(vacc0123p1, vi9x0123, vk9x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 251 vacc0123p1 = vfmaq_f32(vacc0123p1, vi11x0123, vk11x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 265 vacc0123p1 = vfmaq_f32(vacc0123p1, vi13x0123, vk13x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 279 vacc0123p1 = vfmaq_f32(vacc0123p1, vi15x0123, vk15x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 293 vacc0123p1 = vfmaq_f32(vacc0123p1, vi17x0123, vk17x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 307 vacc0123p1 = vfmaq_f32(vacc0123p1, vi19x0123, vk19x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() [all …]
|
D | up8x25-minmax-neon-acc2.c | 181 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() local 195 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 209 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 223 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 237 vacc0123p1 = vmlaq_f32(vacc0123p1, vi9x0123, vk9x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 251 vacc0123p1 = vmlaq_f32(vacc0123p1, vi11x0123, vk11x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 265 vacc0123p1 = vmlaq_f32(vacc0123p1, vi13x0123, vk13x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 279 vacc0123p1 = vmlaq_f32(vacc0123p1, vi15x0123, vk15x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 293 vacc0123p1 = vmlaq_f32(vacc0123p1, vi17x0123, vk17x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 307 vacc0123p1 = vmlaq_f32(vacc0123p1, vi19x0123, vk19x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() [all …]
|
D | up8x25-minmax-sse-acc2.c | 184 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() local 202 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 220 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 238 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 256 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi9x0123, vk9x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 274 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi11x0123, vk11x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 292 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi13x0123, vk13x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 310 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi15x0123, vk15x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 328 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi17x0123, vk17x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 346 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi19x0123, vk19x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() [all …]
|
D | up8x25-minmax-wasmsimd-x86-acc2.c | 184 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() local 202 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() 220 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() 238 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() 256 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi9x0123, vk9x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() 274 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi11x0123, vk11x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() 292 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi13x0123, vk13x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() 310 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi15x0123, vk15x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() 328 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi17x0123, vk17x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() 346 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi19x0123, vk19x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() [all …]
|
D | up8x25-minmax-wasmsimd-arm-acc2.c | 184 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() local 202 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() 220 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() 238 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() 256 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi9x0123, vk9x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() 274 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi11x0123, vk11x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() 292 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi13x0123, vk13x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() 310 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi15x0123, vk15x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() 328 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi17x0123, vk17x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() 346 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi19x0123, vk19x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() [all …]
|
D | up8x9-minmax-neon-acc2.c | 101 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 115 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 129 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 143 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 154 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 175 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() local 183 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 191 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 199 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() 206 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2() [all …]
|
D | up8x9-minmax-sse-acc2.c | 104 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 122 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 140 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 158 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 173 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 198 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() local 210 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 222 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 234 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() 245 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2() [all …]
|
D | up8x9-minmax-wasmsimd-arm-acc2.c | 104 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 122 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 140 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 158 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 173 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 199 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() local 211 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 223 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 235 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() 246 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2() [all …]
|
D | up8x9-minmax-neonfma-acc2.c | 101 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 115 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 129 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 143 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 154 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 175 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() local 183 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 191 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 199 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() 206 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2() [all …]
|
D | up8x9-minmax-wasmsimd-x86-acc2.c | 104 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 122 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 140 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 158 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 173 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 199 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() local 211 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 223 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 235 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() 246 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2() [all …]
|
D | up4x9-minmax-sse-acc2.c | 98 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() local 110 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() 122 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() 134 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() 145 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() 162 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() local 170 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() 178 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() 186 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2() 193 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2()
|
D | up4x9-minmax-neon-acc2.c | 95 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() local 103 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() 111 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() 119 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() 126 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() 143 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() local 151 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() 159 vacc0123p1 = vmlaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() 167 vacc0123p1 = vmlaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2() 174 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2()
|
D | up4x9-minmax-wasmsimd-arm-acc2.c | 98 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() local 110 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() 122 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() 134 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() 145 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() 163 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() local 171 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() 179 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() 187 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2() 194 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2()
|
D | up4x9-minmax-wasmsimd-x86-acc2.c | 98 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() local 110 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() 122 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() 134 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() 145 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() 163 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() local 171 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() 179 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() 187 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2() 194 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2()
|
D | up4x9-minmax-neonfma-acc2.c | 95 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() local 103 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() 111 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() 119 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() 126 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() 143 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() local 151 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() 159 vacc0123p1 = vfmaq_f32(vacc0123p1, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() 167 vacc0123p1 = vfmaq_f32(vacc0123p1, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2() 174 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2()
|
D | up8x4-minmax-sse-acc2.c | 79 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() local 97 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() 103 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() 128 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() local 140 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() 145 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() 162 __m128 vacc0123p1 = _mm_mul_ps(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() local 170 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2() 173 vacc0123p0 = _mm_add_ps(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2()
|
D | up8x4-minmax-neonfma-acc2.c | 76 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() local 90 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() 94 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() 115 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() local 123 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() 126 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() 143 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() local 151 vacc0123p1 = vfmaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2() 154 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2()
|
D | up8x4-minmax-wasmsimd-arm-acc2.c | 79 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() local 97 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() 103 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() 129 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() local 141 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() 146 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() 163 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() local 171 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2() 174 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_arm_acc2()
|
D | up8x4-minmax-neon-acc2.c | 76 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() local 90 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() 94 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() 115 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() local 123 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() 126 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() 143 float32x4_t vacc0123p1 = vmulq_f32(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() local 151 vacc0123p1 = vmlaq_f32(vacc0123p1, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2() 154 vacc0123p0 = vaddq_f32(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2()
|
D | up8x4-minmax-wasmsimd-x86-acc2.c | 79 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() local 97 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() 103 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() 129 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() local 141 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() 146 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() 163 v128_t vacc0123p1 = wasm_f32x4_mul(vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() local 171 vacc0123p1 = wasm_f32x4_add(vacc0123p1, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2() 174 vacc0123p0 = wasm_f32x4_add(vacc0123p0, vacc0123p1); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86_acc2()
|