/external/XNNPACK/src/f32-dwconv/gen/ |
D | up4x25-minmax-wasmsimd-x86.c | 264 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() local 268 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() 395 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() local 397 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86()
|
D | up4x25-minmax-wasmsimd-x86-acc2.c | 264 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() local 268 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() 397 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2() local 399 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86_acc2()
|
D | up4x25-minmax-sse.c | 264 const __m128 vi16x0123 = _mm_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() local 268 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() 394 const __m128 vi16x0123 = _mm_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() local 396 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
|
D | up4x25-minmax-sse-acc2.c | 264 const __m128 vi16x0123 = _mm_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() local 268 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() 396 const __m128 vi16x0123 = _mm_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2() local 398 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2()
|
D | up4x25-wasmsimd.c | 262 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() local 266 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() 391 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() local 393 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd()
|
D | up4x25-minmax-neonfma.c | 233 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() local 235 vacc0123p0 = vfmaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() 343 const float32x4_t vi16x0123 = vld1q_f32(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() local 345 vacc0123p0 = vfmaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma()
|
D | up4x25-minmax-wasmsimd-arm.c | 264 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() local 268 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() 395 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() local 397 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm()
|
D | up4x25-minmax-neonfma-acc2.c | 233 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() local 235 vacc0123p0 = vfmaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() 345 const float32x4_t vi16x0123 = vld1q_f32(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2() local 347 vacc0123p0 = vfmaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2()
|
D | up4x25-minmax-wasmsimd-arm-acc2.c | 264 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() local 268 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() 397 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2() local 399 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm_acc2()
|
D | up4x25-minmax-neon-acc2.c | 233 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() local 235 vacc0123p0 = vmlaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() 345 const float32x4_t vi16x0123 = vld1q_f32(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2() local 347 vacc0123p0 = vmlaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2()
|
D | up4x25-minmax-neon.c | 233 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() local 235 vacc0123p0 = vmlaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() 343 const float32x4_t vi16x0123 = vld1q_f32(i16); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() local 345 vacc0123p0 = vmlaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon()
|
D | up8x25-minmax-sse.c | 313 const __m128 vi16x0123 = _mm_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() local 319 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 505 const __m128 vi16x0123 = _mm_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() local 509 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() 635 const __m128 vi16x0123 = _mm_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() local 637 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
|
D | up8x25-minmax-neon.c | 282 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() local 286 vacc0123p0 = vmlaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 422 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() local 424 vacc0123p0 = vmlaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 532 const float32x4_t vi16x0123 = vld1q_f32(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() local 534 vacc0123p0 = vmlaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon()
|
D | up8x25-minmax-neonfma.c | 282 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() local 286 vacc0123p0 = vfmaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 422 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() local 424 vacc0123p0 = vfmaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() 532 const float32x4_t vi16x0123 = vld1q_f32(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() local 534 vacc0123p0 = vfmaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma()
|
D | up8x25-wasmsimd.c | 311 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() local 317 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() 501 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() local 505 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() 630 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() local 632 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd()
|
D | up8x25-minmax-neonfma-acc2.c | 282 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() local 286 vacc0123p0 = vfmaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 425 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() local 427 vacc0123p0 = vfmaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() 537 const float32x4_t vi16x0123 = vld1q_f32(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2() local 539 vacc0123p0 = vfmaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2()
|
D | up8x25-minmax-wasmsimd-arm.c | 313 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() local 319 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() 506 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() local 510 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() 636 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() local 638 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm()
|
D | up8x25-minmax-neon-acc2.c | 282 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() local 286 vacc0123p0 = vmlaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 425 const float32x4_t vi16x0123 = vld1q_f32(i16); i16 += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() local 427 vacc0123p0 = vmlaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() 537 const float32x4_t vi16x0123 = vld1q_f32(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2() local 539 vacc0123p0 = vmlaq_f32(vacc0123p0, vi16x0123, vk16x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2()
|
D | up8x25-minmax-wasmsimd-x86.c | 313 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() local 319 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() 506 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() local 510 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() 636 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() local 638 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86()
|
D | up8x25-minmax-sse-acc2.c | 313 const __m128 vi16x0123 = _mm_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() local 319 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 508 const __m128 vi16x0123 = _mm_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() local 512 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() 640 const __m128 vi16x0123 = _mm_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2() local 642 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2()
|
D | up8x25-minmax-wasmsimd-x86-acc2.c | 313 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() local 319 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() 509 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() local 513 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() 641 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2() local 643 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86_acc2()
|
D | up8x25-minmax-wasmsimd-arm-acc2.c | 313 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() local 319 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() 509 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() local 513 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() 641 const v128_t vi16x0123 = wasm_v128_load(i16); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2() local 643 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi16x0123, vk16x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm_acc2()
|