Home
last modified time | relevance | path

Searched refs:vacc4x4567 (Results 1 – 25 of 104) sorted by relevance

12345

/external/XNNPACK/src/f32-gemm/gen-inc/
D5x8s4inc-minmax-wasmsimd-x86.c77 v128_t vacc4x4567 = wasm_v128_load(acc + 36); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() local
106 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
126 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
146 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
166 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
198 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
214 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
226 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
230 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
260 vacc4x0123 = vacc4x4567; in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
D5x8inc-minmax-wasmsimd-x86-splat.c77 v128_t vacc4x4567 = wasm_v128_load(acc + 36); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat() local
111 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c0, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat()
130 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c1, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat()
149 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c2, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat()
168 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c3, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat()
199 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat()
215 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat()
227 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat()
231 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat()
261 vacc4x0123 = vacc4x4567; in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_x86_splat()
D5x8inc-minmax-neon-lane-ld64.c78 float32x4_t vacc4x4567 = vld1q_f32(acc); acc += 4; in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() local
100 vacc4x4567 = vmlaq_lane_f32(vacc4x4567, vb4567c0, va4, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64()
113 vacc4x4567 = vmlaq_lane_f32(vacc4x4567, vb4567c1, va4, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64()
134 vacc4x4567 = vmlaq_f32(vacc4x4567, va4, vb4567); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64()
146 vacc4x4567 = vminq_f32(vacc4x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64()
158 vacc4x4567 = vmaxq_f32(vacc4x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64()
162 vst1q_f32(c4 + 4, vacc4x4567); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64()
193 vacc4x0123 = vacc4x4567; in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64()
D5x8inc-minmax-neonfma-lane-ld64.c78 float32x4_t vacc4x4567 = vld1q_f32(acc); acc += 4; in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() local
100 vacc4x4567 = vfmaq_lane_f32(vacc4x4567, vb4567c0, va4, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64()
113 vacc4x4567 = vfmaq_lane_f32(vacc4x4567, vb4567c1, va4, 1); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64()
134 vacc4x4567 = vfmaq_f32(vacc4x4567, va4, vb4567); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64()
146 vacc4x4567 = vminq_f32(vacc4x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64()
158 vacc4x4567 = vmaxq_f32(vacc4x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64()
162 vst1q_f32(c4 + 4, vacc4x4567); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64()
193 vacc4x0123 = vacc4x4567; in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64()
D5x8inc-minmax-wasmsimd-arm-splat.c79 v128_t vacc4x4567 = wasm_v128_load(acc + 36); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat() local
113 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c0, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat()
132 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c1, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat()
151 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c2, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat()
170 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c3, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat()
201 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat()
216 vacc4x4567 = wasm_f32x4_max(vacc4x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat()
227 vacc4x4567 = wasm_f32x4_min(vacc4x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat()
231 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat()
261 vacc4x0123 = vacc4x4567; in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_splat()
D5x8s4inc-minmax-wasmsimd-arm.c79 v128_t vacc4x4567 = wasm_v128_load(acc + 36); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() local
108 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
128 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
148 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
168 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
200 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
215 vacc4x4567 = wasm_f32x4_max(vacc4x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
226 vacc4x4567 = wasm_f32x4_min(vacc4x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
230 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
260 vacc4x0123 = vacc4x4567; in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
D5x8s4inc-minmax-sse.c77 __m128 vacc4x4567 = _mm_load_ps(acc + 36); in xnn_f32_gemminc_minmax_ukernel_5x8s4__sse() local
106 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__sse()
126 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__sse()
146 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__sse()
166 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__sse()
198 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__sse()
214 vacc4x4567 = _mm_min_ps(vacc4x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_5x8s4__sse()
226 vacc4x4567 = _mm_max_ps(vacc4x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_5x8s4__sse()
230 _mm_storeu_ps(c4 + 4, vacc4x4567); in xnn_f32_gemminc_minmax_ukernel_5x8s4__sse()
260 vacc4x0123 = vacc4x4567; in xnn_f32_gemminc_minmax_ukernel_5x8s4__sse()
D6x8s4inc-minmax-wasmsimd-x86.c83 v128_t vacc4x4567 = wasm_v128_load(acc + 36); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86() local
117 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
140 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
163 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
186 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
222 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
240 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
254 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
262 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
295 vacc4x0123 = vacc4x4567; in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
D6x8inc-minmax-wasmsimd-x86-splat.c83 v128_t vacc4x4567 = wasm_v128_load(acc + 36); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat() local
123 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c0, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
145 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c1, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
167 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c2, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
189 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c3, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
224 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
242 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
256 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
264 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
297 vacc4x0123 = vacc4x4567; in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
/external/XNNPACK/src/f32-gemm/gen/
D5x8s4-minmax-wasmsimd-x86.c75 v128_t vacc4x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86() local
104 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86()
124 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86()
144 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86()
164 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86()
196 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86()
212 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86()
224 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86()
228 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86()
258 vacc4x0123 = vacc4x4567; in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86()
D5x8s4-minmax-wasmsimd-arm.c77 v128_t vacc4x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() local
106 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
126 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
146 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
166 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
198 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
213 vacc4x4567 = wasm_f32x4_max(vacc4x4567, vmin); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
224 vacc4x4567 = wasm_f32x4_min(vacc4x4567, vmax); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
228 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
258 vacc4x0123 = vacc4x4567; in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
D5x8-minmax-wasmsimd-x86-splat.c75 v128_t vacc4x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat() local
109 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c0, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
128 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c1, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
147 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c2, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
166 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c3, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
197 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
213 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
225 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
229 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
259 vacc4x0123 = vacc4x4567; in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
D5x8-minmax-neonfma-lane-ld64.c76 float32x4_t vacc4x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() local
98 vacc4x4567 = vfmaq_lane_f32(vacc4x4567, vb4567c0, va4, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64()
111 vacc4x4567 = vfmaq_lane_f32(vacc4x4567, vb4567c1, va4, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64()
132 vacc4x4567 = vfmaq_f32(vacc4x4567, va4, vb4567); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64()
144 vacc4x4567 = vminq_f32(vacc4x4567, vmax); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64()
156 vacc4x4567 = vmaxq_f32(vacc4x4567, vmin); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64()
160 vst1q_f32(c4 + 4, vacc4x4567); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64()
191 vacc4x0123 = vacc4x4567; in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64()
D5x8-minmax-neon-lane-ld64.c76 float32x4_t vacc4x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() local
98 vacc4x4567 = vmlaq_lane_f32(vacc4x4567, vb4567c0, va4, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64()
111 vacc4x4567 = vmlaq_lane_f32(vacc4x4567, vb4567c1, va4, 1); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64()
132 vacc4x4567 = vmlaq_f32(vacc4x4567, va4, vb4567); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64()
144 vacc4x4567 = vminq_f32(vacc4x4567, vmax); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64()
156 vacc4x4567 = vmaxq_f32(vacc4x4567, vmin); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64()
160 vst1q_f32(c4 + 4, vacc4x4567); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64()
191 vacc4x0123 = vacc4x4567; in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64()
D5x8-minmax-sse-dup.c75 __m128 vacc4x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup() local
110 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
130 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4c1111, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
150 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
170 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
201 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
217 vacc4x4567 = _mm_min_ps(vacc4x4567, vmax); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
229 vacc4x4567 = _mm_max_ps(vacc4x4567, vmin); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
233 _mm_storeu_ps(c4 + 4, vacc4x4567); in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
263 vacc4x0123 = vacc4x4567; in xnn_f32_gemm_minmax_ukernel_5x8__sse_dup()
D5x8-minmax-wasmsimd-arm-splat.c77 v128_t vacc4x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat() local
111 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c0, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat()
130 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c1, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat()
149 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c2, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat()
168 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c3, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat()
199 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat()
214 vacc4x4567 = wasm_f32x4_max(vacc4x4567, vmin); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat()
225 vacc4x4567 = wasm_f32x4_min(vacc4x4567, vmax); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat()
229 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat()
259 vacc4x0123 = vacc4x4567; in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat()
D5x8s4-minmax-sse.c75 __m128 vacc4x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_5x8s4__sse() local
104 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__sse()
124 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_5x8s4__sse()
144 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_5x8s4__sse()
164 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__sse()
196 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8s4__sse()
212 vacc4x4567 = _mm_min_ps(vacc4x4567, vmax); in xnn_f32_gemm_minmax_ukernel_5x8s4__sse()
224 vacc4x4567 = _mm_max_ps(vacc4x4567, vmin); in xnn_f32_gemm_minmax_ukernel_5x8s4__sse()
228 _mm_storeu_ps(c4 + 4, vacc4x4567); in xnn_f32_gemm_minmax_ukernel_5x8s4__sse()
258 vacc4x0123 = vacc4x4567; in xnn_f32_gemm_minmax_ukernel_5x8s4__sse()
D6x8-minmax-wasmsimd-x86-splat.c81 v128_t vacc4x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat() local
121 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c0, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
143 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c1, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
165 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c2, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
187 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c3, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
222 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
240 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
254 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
262 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
295 vacc4x0123 = vacc4x4567; in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
D6x8s4-minmax-wasmsimd-x86.c81 v128_t vacc4x4567 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86() local
115 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
138 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
161 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
184 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
220 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
238 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
252 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
260 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
293 vacc4x0123 = vacc4x4567; in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
/external/XNNPACK/src/f32-igemm/gen/
D5x8-minmax-wasmsimd-x86-splat.c71 v128_t vacc4x4567 = vacc0x4567; in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat() local
134 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c0, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
153 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c1, vb4567c1)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
172 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c2, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
191 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c3, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
222 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
239 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
251 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
255 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
280 vacc4x0123 = vacc4x4567; in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat()
D5x8s4-minmax-wasmsimd-x86.c71 v128_t vacc4x4567 = vacc0x4567; in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86() local
129 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86()
149 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c1)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86()
169 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86()
189 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86()
221 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86()
238 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86()
250 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86()
254 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86()
279 vacc4x0123 = vacc4x4567; in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86()
D5x8s4-minmax-wasmsimd-arm.c73 v128_t vacc4x4567 = vacc0x4567; in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm() local
131 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm()
151 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c1)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm()
171 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm()
191 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm()
223 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm()
239 vacc4x4567 = wasm_f32x4_max(vacc4x4567, vmin); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm()
250 vacc4x4567 = wasm_f32x4_min(vacc4x4567, vmax); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm()
254 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm()
279 vacc4x0123 = vacc4x4567; in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm()
D5x8s4-minmax-sse.c71 __m128 vacc4x4567 = vacc0x4567; in xnn_f32_igemm_minmax_ukernel_5x8s4__sse() local
129 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_5x8s4__sse()
149 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c1)); in xnn_f32_igemm_minmax_ukernel_5x8s4__sse()
169 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_5x8s4__sse()
189 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_5x8s4__sse()
221 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8s4__sse()
238 vacc4x4567 = _mm_min_ps(vacc4x4567, vmax); in xnn_f32_igemm_minmax_ukernel_5x8s4__sse()
250 vacc4x4567 = _mm_max_ps(vacc4x4567, vmin); in xnn_f32_igemm_minmax_ukernel_5x8s4__sse()
254 _mm_storeu_ps(c4 + 4, vacc4x4567); in xnn_f32_igemm_minmax_ukernel_5x8s4__sse()
279 vacc4x0123 = vacc4x4567; in xnn_f32_igemm_minmax_ukernel_5x8s4__sse()
D6x8s4-minmax-wasmsimd-x86.c75 v128_t vacc4x4567 = vacc0x4567; in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86() local
143 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
166 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c1)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
189 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
212 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
247 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
267 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
281 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
289 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
316 vacc4x0123 = vacc4x4567; in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
D6x8-minmax-wasmsimd-x86-splat.c75 v128_t vacc4x4567 = vacc0x4567; in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat() local
149 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c0, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
171 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c1, vb4567c1)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
193 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c2, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
215 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4c3, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
249 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
269 vacc4x4567 = wasm_v128_bitselect(vmin, vacc4x4567, wasm_f32x4_lt(vacc4x4567, vmin)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
283 vacc4x4567 = wasm_v128_bitselect(vacc4x4567, vmax, wasm_f32x4_le(vacc4x4567, vmax)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
291 wasm_v128_store(c4 + 4, vacc4x4567); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
318 vacc4x0123 = vacc4x4567; in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()

12345