Home
last modified time | relevance | path

Searched refs:vacc5x0123 (Results 1 – 25 of 93) sorted by relevance

1234

/external/XNNPACK/src/f32-gemm/gen/
D6x8-minmax-wasmsimd-x86-loadsplat.c82 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat() local
110 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
127 vacc5x0123 = wasm_v128_bitselect(vmin, vacc5x0123, wasm_f32x4_lt(vacc5x0123, vmin)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
141 vacc5x0123 = wasm_v128_bitselect(vacc5x0123, vmax, wasm_f32x4_le(vacc5x0123, vmax)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
150 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
179 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
186 vacc5x0123 = vacc5x4567; in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
201 *((double*) c5) = wasm_f64x2_extract_lane(vacc5x0123, 0); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
208 vacc5x0123 = wasm_v32x4_shuffle(vacc5x0123, vacc5x0123, 2, 3, 2, 3); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
223 *c5 = wasm_f32x4_extract_lane(vacc5x0123, 0); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
D6x8-minmax-wasmsimd-arm-loadsplat.c84 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat() local
112 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
128 vacc5x0123 = wasm_f32x4_max(vacc5x0123, vmin); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
141 vacc5x0123 = wasm_f32x4_min(vacc5x0123, vmax); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
150 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
179 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
186 vacc5x0123 = vacc5x4567; in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
201 *((double*) c5) = wasm_f64x2_extract_lane(vacc5x0123, 0); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
208 vacc5x0123 = wasm_v32x4_shuffle(vacc5x0123, vacc5x0123, 2, 3, 2, 3); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
223 *c5 = wasm_f32x4_extract_lane(vacc5x0123, 0); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
D6x8-minmax-wasmsimd-x86-splat.c82 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat() local
116 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c0, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
138 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c1, vb0123c1)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
160 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c2, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
182 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c3, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
217 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
235 vacc5x0123 = wasm_v128_bitselect(vmin, vacc5x0123, wasm_f32x4_lt(vacc5x0123, vmin)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
249 vacc5x0123 = wasm_v128_bitselect(vacc5x0123, vmax, wasm_f32x4_le(vacc5x0123, vmax)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
258 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
287 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
[all …]
D6x8s4-minmax-wasmsimd-x86.c82 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86() local
110 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
133 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c1)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
156 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
179 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
215 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
233 vacc5x0123 = wasm_v128_bitselect(vmin, vacc5x0123, wasm_f32x4_lt(vacc5x0123, vmin)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
247 vacc5x0123 = wasm_v128_bitselect(vacc5x0123, vmax, wasm_f32x4_le(vacc5x0123, vmax)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
256 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
285 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86()
[all …]
D6x8-minmax-wasmsimd-arm-splat.c84 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat() local
118 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c0, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
140 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c1, vb0123c1)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
162 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c2, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
184 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c3, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
219 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
236 vacc5x0123 = wasm_f32x4_max(vacc5x0123, vmin); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
249 vacc5x0123 = wasm_f32x4_min(vacc5x0123, vmax); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
258 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
287 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
[all …]
D6x8s4-minmax-wasmsimd-arm.c84 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm() local
112 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm()
135 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c1)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm()
158 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm()
181 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm()
217 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm()
234 vacc5x0123 = wasm_f32x4_max(vacc5x0123, vmin); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm()
247 vacc5x0123 = wasm_f32x4_min(vacc5x0123, vmax); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm()
256 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm()
285 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm()
[all …]
D6x8-minmax-neonfma-lane-ld128.c83 float32x4_t vacc5x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128() local
104 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c0, vget_low_f32(va5), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
120 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c1, vget_low_f32(va5), 1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
136 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c2, vget_high_f32(va5), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
152 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c3, vget_high_f32(va5), 1); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
177 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
194 vacc5x0123 = vminq_f32(vacc5x0123, vmax); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
208 vacc5x0123 = vmaxq_f32(vacc5x0123, vmin); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
217 vst1q_f32(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
247 vst1q_f32(c5, vacc5x0123); c5 += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
[all …]
D6x8s4-minmax-neon.c83 float32x4_t vacc5x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() local
104 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
127 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
150 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
173 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
202 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
219 vacc5x0123 = vminq_f32(vacc5x0123, vmax); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
233 vacc5x0123 = vmaxq_f32(vacc5x0123, vmin); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
242 vst1q_f32(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
272 vst1q_f32(c5, vacc5x0123); c5 += 4; in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
[all …]
D6x8s4-minmax-neonfma.c83 float32x4_t vacc5x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma() local
104 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
127 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123c1); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
150 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
173 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
202 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
219 vacc5x0123 = vminq_f32(vacc5x0123, vmax); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
233 vacc5x0123 = vmaxq_f32(vacc5x0123, vmin); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
242 vst1q_f32(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
272 vst1q_f32(c5, vacc5x0123); c5 += 4; in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
[all …]
D6x8-minmax-neon-lane-ld128.c83 float32x4_t vacc5x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128() local
104 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c0, vget_low_f32(va5), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
120 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c1, vget_low_f32(va5), 1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
136 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c2, vget_high_f32(va5), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
152 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c3, vget_high_f32(va5), 1); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
177 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
194 vacc5x0123 = vminq_f32(vacc5x0123, vmax); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
208 vacc5x0123 = vmaxq_f32(vacc5x0123, vmin); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
217 vst1q_f32(c5, vacc5x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
247 vst1q_f32(c5, vacc5x0123); c5 += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D6x8inc-minmax-wasmsimd-x86-loadsplat.c84 v128_t vacc5x0123 = wasm_v128_load(acc + 40); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat() local
112 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
129 vacc5x0123 = wasm_v128_bitselect(vmin, vacc5x0123, wasm_f32x4_lt(vacc5x0123, vmin)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
143 vacc5x0123 = wasm_v128_bitselect(vacc5x0123, vmax, wasm_f32x4_le(vacc5x0123, vmax)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
152 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
181 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
188 vacc5x0123 = vacc5x4567; in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
203 *((double*) c5) = wasm_f64x2_extract_lane(vacc5x0123, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
210 vacc5x0123 = wasm_v32x4_shuffle(vacc5x0123, vacc5x0123, 2, 3, 2, 3); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
225 *c5 = wasm_f32x4_extract_lane(vacc5x0123, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
D6x8inc-minmax-wasmsimd-arm-loadsplat.c86 v128_t vacc5x0123 = wasm_v128_load(acc + 40); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat() local
114 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
130 vacc5x0123 = wasm_f32x4_max(vacc5x0123, vmin); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
143 vacc5x0123 = wasm_f32x4_min(vacc5x0123, vmax); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
152 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
181 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
188 vacc5x0123 = vacc5x4567; in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
203 *((double*) c5) = wasm_f64x2_extract_lane(vacc5x0123, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
210 vacc5x0123 = wasm_v32x4_shuffle(vacc5x0123, vacc5x0123, 2, 3, 2, 3); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
225 *c5 = wasm_f32x4_extract_lane(vacc5x0123, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
D6x8s4inc-minmax-wasmsimd-x86.c84 v128_t vacc5x0123 = wasm_v128_load(acc + 40); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86() local
112 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
135 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c1)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
158 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
181 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
217 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
235 vacc5x0123 = wasm_v128_bitselect(vmin, vacc5x0123, wasm_f32x4_lt(vacc5x0123, vmin)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
249 vacc5x0123 = wasm_v128_bitselect(vacc5x0123, vmax, wasm_f32x4_le(vacc5x0123, vmax)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
258 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
287 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_x86()
[all …]
D6x8inc-minmax-wasmsimd-x86-splat.c84 v128_t vacc5x0123 = wasm_v128_load(acc + 40); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat() local
118 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c0, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
140 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c1, vb0123c1)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
162 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c2, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
184 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c3, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
219 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
237 vacc5x0123 = wasm_v128_bitselect(vmin, vacc5x0123, wasm_f32x4_lt(vacc5x0123, vmin)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
251 vacc5x0123 = wasm_v128_bitselect(vacc5x0123, vmax, wasm_f32x4_le(vacc5x0123, vmax)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
260 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
289 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_splat()
[all …]
D6x8s4inc-minmax-wasmsimd-arm.c86 v128_t vacc5x0123 = wasm_v128_load(acc + 40); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm() local
114 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm()
137 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c1)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm()
160 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm()
183 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm()
219 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm()
236 vacc5x0123 = wasm_f32x4_max(vacc5x0123, vmin); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm()
249 vacc5x0123 = wasm_f32x4_min(vacc5x0123, vmax); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm()
258 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm()
287 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8s4__wasmsimd_arm()
[all …]
D6x8inc-minmax-wasmsimd-arm-splat.c86 v128_t vacc5x0123 = wasm_v128_load(acc + 40); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat() local
120 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c0, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat()
142 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c1, vb0123c1)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat()
164 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c2, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat()
186 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c3, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat()
221 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat()
238 vacc5x0123 = wasm_f32x4_max(vacc5x0123, vmin); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat()
251 vacc5x0123 = wasm_f32x4_min(vacc5x0123, vmax); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat()
260 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat()
289 wasm_v128_store(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_splat()
[all …]
D6x8inc-minmax-neon-lane-ld128.c85 float32x4_t vacc5x0123 = vld1q_f32(acc); acc += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128() local
106 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c0, vget_low_f32(va5), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
122 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c1, vget_low_f32(va5), 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
138 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c2, vget_high_f32(va5), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
154 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c3, vget_high_f32(va5), 1); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
179 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
196 vacc5x0123 = vminq_f32(vacc5x0123, vmax); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
210 vacc5x0123 = vmaxq_f32(vacc5x0123, vmin); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
219 vst1q_f32(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
249 vst1q_f32(c5, vacc5x0123); c5 += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
[all …]
D6x8s4inc-minmax-neon.c85 float32x4_t vacc5x0123 = vld1q_f32(acc); acc += 4; in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() local
106 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
129 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c1); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
152 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
175 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c3); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
204 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
221 vacc5x0123 = vminq_f32(vacc5x0123, vmax); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
235 vacc5x0123 = vmaxq_f32(vacc5x0123, vmin); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
244 vst1q_f32(c5, vacc5x0123); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
274 vst1q_f32(c5, vacc5x0123); c5 += 4; in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
[all …]
/external/XNNPACK/src/f32-igemm/gen/
D6x8s4-minmax-wasmsimd-x86.c76 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86() local
138 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
161 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c1)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
184 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
207 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
248 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
262 vacc5x0123 = wasm_v128_bitselect(vmin, vacc5x0123, wasm_f32x4_lt(vacc5x0123, vmin)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
276 vacc5x0123 = wasm_v128_bitselect(vacc5x0123, vmax, wasm_f32x4_le(vacc5x0123, vmax)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
285 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
308 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86()
[all …]
D6x8-minmax-wasmsimd-x86-splat.c76 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat() local
144 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c0, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
166 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c1, vb0123c1)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
188 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c2, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
210 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c3, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
250 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
264 vacc5x0123 = wasm_v128_bitselect(vmin, vacc5x0123, wasm_f32x4_lt(vacc5x0123, vmin)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
278 vacc5x0123 = wasm_v128_bitselect(vacc5x0123, vmax, wasm_f32x4_le(vacc5x0123, vmax)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
287 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
310 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat()
[all …]
D6x8-minmax-wasmsimd-x86-loadsplat.c76 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat() local
143 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
156 vacc5x0123 = wasm_v128_bitselect(vmin, vacc5x0123, wasm_f32x4_lt(vacc5x0123, vmin)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
170 vacc5x0123 = wasm_v128_bitselect(vacc5x0123, vmax, wasm_f32x4_le(vacc5x0123, vmax)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
179 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
202 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
209 vacc5x0123 = vacc5x4567; in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
224 *((double*) c5) = wasm_f64x2_extract_lane(vacc5x0123, 0); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
231 vacc5x0123 = wasm_v32x4_shuffle(vacc5x0123, vacc5x0123, 2, 3, 2, 3); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
246 *c5 = wasm_f32x4_extract_lane(vacc5x0123, 0); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
D6x8s4-minmax-wasmsimd-arm.c78 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm() local
140 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm()
163 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c1)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm()
186 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm()
209 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm()
250 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm()
263 vacc5x0123 = wasm_f32x4_max(vacc5x0123, vmin); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm()
276 vacc5x0123 = wasm_f32x4_min(vacc5x0123, vmax); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm()
285 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm()
308 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm()
[all …]
D6x8-minmax-wasmsimd-arm-loadsplat.c78 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat() local
145 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
157 vacc5x0123 = wasm_f32x4_max(vacc5x0123, vmin); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
170 vacc5x0123 = wasm_f32x4_min(vacc5x0123, vmax); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
179 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
202 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
209 vacc5x0123 = vacc5x4567; in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
224 *((double*) c5) = wasm_f64x2_extract_lane(vacc5x0123, 0); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
231 vacc5x0123 = wasm_v32x4_shuffle(vacc5x0123, vacc5x0123, 2, 3, 2, 3); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
246 *c5 = wasm_f32x4_extract_lane(vacc5x0123, 0); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
D6x8-minmax-wasmsimd-arm-splat.c78 v128_t vacc5x0123 = vacc0x0123; in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat() local
146 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c0, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
168 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c1, vb0123c1)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
190 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c2, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
212 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5c3, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
252 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
265 vacc5x0123 = wasm_f32x4_max(vacc5x0123, vmin); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
278 vacc5x0123 = wasm_f32x4_min(vacc5x0123, vmax); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
287 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
310 wasm_v128_store(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat()
[all …]
D6x8-minmax-neonfma-lane-ld128.c77 float32x4_t vacc5x0123 = vacc0x0123; in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128() local
132 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c0, vget_low_f32(va5), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
148 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c1, vget_low_f32(va5), 1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
164 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c2, vget_high_f32(va5), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
180 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c3, vget_high_f32(va5), 1); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
205 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
226 vacc5x0123 = vminq_f32(vacc5x0123, vmax); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
240 vacc5x0123 = vmaxq_f32(vacc5x0123, vmin); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
249 vst1q_f32(c5, vacc5x0123); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
272 vst1q_f32(c5, vacc5x0123); c5 += 4; in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
[all …]

1234