/external/XNNPACK/src/f32-gemm/gen/ |
D | 1x8-minmax-neon-lane-ld64.c | 48 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64() local 62 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64() local
|
D | 1x8-minmax-neonfma-lane-ld64.c | 48 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64() local 62 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64() local
|
D | 1x8-minmax-neonfma-dup-ld64.c | 48 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64() local 64 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64() local
|
D | 1x8-minmax-neon-dup-ld64.c | 48 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64() local 64 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64() local
|
D | 1x8s4-minmax-wasmsimd-x86.c | 48 v128_t va0 = wasm_v128_load(a0); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86() local 88 const v128_t va0 = wasm_v32x4_load_splat(a0); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86() local
|
D | 1x8s4-minmax-sse.c | 48 __m128 va0 = _mm_loadu_ps(a0); in xnn_f32_gemm_minmax_ukernel_1x8s4__sse() local 88 const __m128 va0 = _mm_load1_ps(a0); in xnn_f32_gemm_minmax_ukernel_1x8s4__sse() local
|
D | 1x8s4-minmax-wasmsimd-arm.c | 50 v128_t va0 = wasm_v128_load(a0); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm() local 90 const v128_t va0 = wasm_v32x4_load_splat(a0); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm() local
|
D | 1x16s4-minmax-fma3-broadcast.c | 48 __m256 va0 = _mm256_broadcast_ps((const __m128*) a0); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() local 88 const __m256 va0 = _mm256_broadcast_ss(a0); in xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast() local
|
D | 1x8s4-minmax-neon.c | 48 float32x4_t va0 = vld1q_f32(a0); a0 += 4; in xnn_f32_gemm_minmax_ukernel_1x8s4__neon() local 87 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemm_minmax_ukernel_1x8s4__neon() local
|
D | 1x8s4-minmax-neonfma.c | 48 float32x4_t va0 = vld1q_f32(a0); a0 += 4; in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma() local 87 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma() local
|
D | 1x8-wasmsimd-splat.c | 48 const v128_t va0 = wasm_v128_load(a0); in xnn_f32_gemm_ukernel_1x8__wasmsimd_splat() local 85 const v128_t va0 = wasm_v32x4_load_splat(a0); in xnn_f32_gemm_ukernel_1x8__wasmsimd_splat() local
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 1x8inc-minmax-neonfma-lane-ld64.c | 50 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64() local 64 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64() local
|
D | 1x8inc-minmax-neon-lane-ld64.c | 50 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64() local 64 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64() local
|
D | 1x8inc-minmax-neon-dup-ld64.c | 50 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64() local 66 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64() local
|
D | 1x8inc-minmax-neonfma-dup-ld64.c | 50 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64() local 66 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64() local
|
D | 1x8s4inc-minmax-wasmsimd-arm.c | 52 v128_t va0 = wasm_v128_load(a0); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm() local 92 const v128_t va0 = wasm_v32x4_load_splat(a0); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm() local
|
D | 1x8s4inc-minmax-wasmsimd-x86.c | 50 v128_t va0 = wasm_v128_load(a0); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86() local 90 const v128_t va0 = wasm_v32x4_load_splat(a0); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86() local
|
D | 1x8s4inc-minmax-sse.c | 50 __m128 va0 = _mm_loadu_ps(a0); in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse() local 90 const __m128 va0 = _mm_load1_ps(a0); in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse() local
|
D | 1x8s4inc-minmax-neon.c | 50 float32x4_t va0 = vld1q_f32(a0); a0 += 4; in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon() local 89 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon() local
|
D | 1x8s4inc-minmax-neonfma.c | 50 float32x4_t va0 = vld1q_f32(a0); a0 += 4; in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma() local 89 const float32x4_t va0 = vld1q_dup_f32(a0); a0 += 1; in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma() local
|
/external/XNNPACK/src/f16-gemm/gen/ |
D | 1x8-minmax-neonfp16arith-ld64.c | 49 const float16x4_t va0 = vld1_f16(a0); a0 += 4; in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() local 92 const float16x8_t va0 = vld1q_dup_f16(a0); a0 += 1; in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() local
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 1x8-minmax-neon-lane-ld64.c | 61 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64() local 76 const float32x4_t va0 = vld1q_dup_f32(a0); in xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64() local
|
D | 1x8-minmax-neonfma-lane-ld64.c | 61 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64() local 76 const float32x4_t va0 = vld1q_dup_f32(a0); in xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64() local
|
D | 1x8-minmax-neonfma-dup-ld64.c | 61 const float32x2_t va0 = vld1_f32(a0); a0 += 2; in xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64() local 78 const float32x4_t va0 = vld1q_dup_f32(a0); in xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64() local
|
/external/XNNPACK/src/f16-gemm/gen-inc/ |
D | 1x8inc-minmax-neonfp16arith-ld64.c | 51 const float16x4_t va0 = vld1_f16(a0); a0 += 4; in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() local 94 const float16x8_t va0 = vld1q_dup_f16(a0); a0 += 1; in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() local
|