Home
last modified time | relevance | path

Searched refs:vi89AB (Results 1 – 25 of 96) sorted by relevance

1234

/external/XNNPACK/src/f32-spmm/gen/
D16x1-minmax-wasmsimd-arm-pipelined-x2.c43 v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2() local
57 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2()
64 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2()
68 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2()
75 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2()
83 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2()
91 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined_x2()
D16x1-minmax-wasmsimd-x86-pipelined-x2.c43 v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2() local
57 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2()
64 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2()
68 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2()
75 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2()
83 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2()
91 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined_x2()
D32x1-minmax-wasmsimd-arm-pipelined-x2.c43 v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() local
65 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
76 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
84 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
95 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
107 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
119 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
175 const v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() local
181 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
D32x1-minmax-wasmsimd-x86-pipelined-x2.c43 v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() local
65 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
76 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
84 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
95 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
107 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
119 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
175 const v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() local
181 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
D32x4-minmax-neonfma.c79 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local
92 vacc89ABn0 = vfmaq_laneq_f32(vacc89ABn0, vi89AB, vw, 0); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
100 vacc89ABn1 = vfmaq_laneq_f32(vacc89ABn1, vi89AB, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
108 vacc89ABn2 = vfmaq_laneq_f32(vacc89ABn2, vi89AB, vw, 2); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
116 vacc89ABn3 = vfmaq_laneq_f32(vacc89ABn3, vi89AB, vw, 3); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
246 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local
259 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
331 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local
338 vacc89ABn0 = vfmaq_laneq_f32(vacc89ABn0, vi89AB, vw, 0); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
342 vacc89ABn1 = vfmaq_laneq_f32(vacc89ABn1, vi89AB, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
[all …]
D32x1-minmax-wasmsimd-arm-pipelined.c43 v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() local
67 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined()
79 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined()
135 const v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() local
141 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined()
D32x1-minmax-neon-pipelined.c43 float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() local
65 vacc89AB = vmlaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined()
79 vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined()
135 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() local
144 vacc89AB = vmlaq_f32(vacc89AB, vi89AB, vb); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined()
D32x1-minmax-neonfma-pipelined.c43 float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() local
65 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined()
79 vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined()
135 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() local
144 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vb); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined()
D32x2-minmax-neonfma.c63 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local
76 vacc89ABn0 = vfmaq_lane_f32(vacc89ABn0, vi89AB, vw, 0); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
84 vacc89ABn1 = vfmaq_lane_f32(vacc89ABn1, vi89AB, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
164 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local
177 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
241 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local
248 vacc89ABn0 = vfmaq_lane_f32(vacc89ABn0, vi89AB, vw, 0); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
252 vacc89ABn1 = vfmaq_lane_f32(vacc89ABn1, vi89AB, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
300 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local
306 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
D32x1-minmax-wasmsimd-x86-pipelined.c43 v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() local
67 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined()
79 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined()
135 const v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() local
141 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined()
D12x4-minmax-neonfma.c59 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() local
66 vacc89ABn0 = vfmaq_laneq_f32(vacc89ABn0, vi89AB, vw, 0); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma()
69 vacc89ABn1 = vfmaq_laneq_f32(vacc89ABn1, vi89AB, vw, 1); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma()
72 vacc89ABn2 = vfmaq_laneq_f32(vacc89ABn2, vi89AB, vw, 2); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma()
75 vacc89ABn3 = vfmaq_laneq_f32(vacc89ABn3, vi89AB, vw, 3); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma()
135 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma() local
142 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_12x4__neonfma()
D32x1-minmax-neon.c55 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neon() local
68 vacc89AB = vmlaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
124 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neon() local
130 vacc89AB = vmlaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
D32x1-minmax-neonfma.c55 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() local
68 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
124 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() local
130 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
D16x4-minmax-neonfma.c63 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() local
71 vacc89ABn0 = vfmaq_laneq_f32(vacc89ABn0, vi89AB, vw, 0); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma()
75 vacc89ABn1 = vfmaq_laneq_f32(vacc89ABn1, vi89AB, vw, 1); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma()
79 vacc89ABn2 = vfmaq_laneq_f32(vacc89ABn2, vi89AB, vw, 2); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma()
83 vacc89ABn3 = vfmaq_laneq_f32(vacc89ABn3, vi89AB, vw, 3); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma()
157 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma() local
165 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_16x4__neonfma()
D32x1-minmax-wasmsimd-arm.c55 const v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm() local
65 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm()
121 const v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm() local
127 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm()
D32x1-minmax-sse.c55 const __m128 vi89AB = _mm_loadu_ps(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__sse() local
65 vacc89AB = _mm_add_ps(vacc89AB, _mm_mul_ps(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__sse()
121 const __m128 vi89AB = _mm_loadu_ps(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__sse() local
127 vacc89AB = _mm_add_ps(vacc89AB, _mm_mul_ps(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__sse()
D12x2-minmax-neonfma.c53 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() local
60 vacc89ABn0 = vfmaq_lane_f32(vacc89ABn0, vi89AB, vw, 0); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma()
63 vacc89ABn1 = vfmaq_lane_f32(vacc89ABn1, vi89AB, vw, 1); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma()
103 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma() local
110 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_12x2__neonfma()
D16x1-minmax-neon-pipelined.c43 float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() local
57 vacc89AB = vmlaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
66 vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
D16x1-minmax-neonfma-pipelined.c43 float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined() local
57 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
66 vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
D16x1-minmax-wasmsimd-arm-pipelined.c43 v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() local
59 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
67 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
D16x1-minmax-wasmsimd-x86-pipelined.c43 v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined() local
59 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined()
67 vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_x86_pipelined()
D32x1-minmax-wasmsimd-x86.c55 const v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() local
65 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86()
121 const v128_t vi89AB = wasm_v128_load(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() local
127 vacc89AB = wasm_f32x4_add(vacc89AB, wasm_f32x4_mul(vi89AB, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86()
D16x2-minmax-neonfma.c55 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() local
63 vacc89ABn0 = vfmaq_lane_f32(vacc89ABn0, vi89AB, vw, 0); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma()
67 vacc89ABn1 = vfmaq_lane_f32(vacc89ABn1, vi89AB, vw, 1); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma()
115 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma() local
123 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_16x2__neonfma()
D32x1-minmax-neon-x2.c126 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() local
139 vacc89AB = vmlaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2()
195 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2() local
201 vacc89AB = vmlaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon_x2()
D32x1-minmax-neonfma-x2.c126 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() local
139 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2()
195 const float32x4_t vi89AB = vld1q_f32(input + 8); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2() local
201 vacc89AB = vfmaq_f32(vacc89AB, vi89AB, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_x2()

1234