Home
last modified time | relevance | path

Searched refs:vacc0x4567 (Results 1 – 25 of 411) sorted by relevance

12345678910>>...17

/external/XNNPACK/src/qs8-igemm/gen/
D1x8-minmax-neon-mlal-lane.c47 int32x4_t vacc0x4567 = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane() local
66vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane()
71vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane()
76vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane()
81vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa0), 3); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane()
87vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane()
92vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane()
97vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa0), 2); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane()
102vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa0), 3); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane()
114vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane()
[all …]
D1x8-minmax-neon-mull-addw-dup.c47 int32x4_t vacc0x4567 = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup() local
65 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
70 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
75 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
80 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c3)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
85 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c4)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
90 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c5)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
95 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c6)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
100 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c7)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
111 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
[all …]
D2x8-minmax-neon-mull-addw-dup.c51 int32x4_t vacc0x4567 = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() local
53 int32x4_t vacc1x4567 = vacc0x4567; in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
76 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
84 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
92 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
100 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c3)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
108 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c4)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
116 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c5)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
124 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c6)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
132 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c7)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D1x8-minmax-neon-mlal-lane.c44 int32x4_t vacc0x4567 = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane() local
55 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane()
60 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane()
65 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane()
70 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa0), 3); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane()
76 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane()
81 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane()
86 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa0), 2); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane()
91 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa0), 3); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane()
103 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane()
[all …]
D1x8-minmax-neon-mull-addw-dup.c44 int32x4_t vacc0x4567 = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup() local
54 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
59 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
64 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
69 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c3)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
74 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c4)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
79 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c5)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
84 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c6)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
89 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c7)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
100 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
[all …]
D2x8-minmax-neon-mull-addw-dup.c50 int32x4_t vacc0x4567 = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() local
52 int32x4_t vacc1x4567 = vacc0x4567; in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
63 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
71 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
79 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
87 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c3)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
95 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c4)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
103 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c5)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
111 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c6)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
119 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c7)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
[all …]
D2x8-minmax-neon-mlal-lane.c50 int32x4_t vacc0x4567 = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane() local
52 int32x4_t vacc1x4567 = vacc0x4567; in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
65 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
72 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
79 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
86 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa0), 3); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
94 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
101 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c5), vget_high_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
108 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c6), vget_high_s16(vxa0), 2); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
115 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c7), vget_high_s16(vxa0), 3); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D1x8s4-minmax-wasmsimd-x86.c43 v128_t vacc0x4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86() local
56 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86()
64 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86()
72 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86()
80 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86()
96 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86()
104 vacc0x4567 = wasm_v128_bitselect(vmin, vacc0x4567, wasm_f32x4_lt(vacc0x4567, vmin)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86()
108 vacc0x4567 = wasm_v128_bitselect(vacc0x4567, vmax, wasm_f32x4_le(vacc0x4567, vmax)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86()
112 wasm_v128_store(c0 + 4, vacc0x4567); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86()
122 vacc0x0123 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86()
D1x8-minmax-wasmsimd-x86-splat.c43 v128_t vacc0x4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat() local
57 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
64 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c1, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
71 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c2, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
78 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c3, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
93 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
101 vacc0x4567 = wasm_v128_bitselect(vmin, vacc0x4567, wasm_f32x4_lt(vacc0x4567, vmin)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
105 vacc0x4567 = wasm_v128_bitselect(vacc0x4567, vmax, wasm_f32x4_le(vacc0x4567, vmax)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
109 wasm_v128_store(c0 + 4, vacc0x4567); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
119 vacc0x0123 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
D1x8s4-minmax-neon.c44 float32x4_t vacc0x4567 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_1x8s4__neon() local
55 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemm_minmax_ukernel_1x8s4__neon()
63 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c1); in xnn_f32_gemm_minmax_ukernel_1x8s4__neon()
71 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c2); in xnn_f32_gemm_minmax_ukernel_1x8s4__neon()
79 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_minmax_ukernel_1x8s4__neon()
93 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567); in xnn_f32_gemm_minmax_ukernel_1x8s4__neon()
100 vacc0x4567 = vminq_f32(vacc0x4567, vmax); in xnn_f32_gemm_minmax_ukernel_1x8s4__neon()
104 vacc0x4567 = vmaxq_f32(vacc0x4567, vmin); in xnn_f32_gemm_minmax_ukernel_1x8s4__neon()
108 vst1q_f32(c0 + 4, vacc0x4567); in xnn_f32_gemm_minmax_ukernel_1x8s4__neon()
119 vacc0x0123 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_1x8s4__neon()
D1x8s4-minmax-neonfma.c44 float32x4_t vacc0x4567 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma() local
55 vacc0x4567 = vfmaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma()
63 vacc0x4567 = vfmaq_f32(vacc0x4567, va0, vb4567c1); in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma()
71 vacc0x4567 = vfmaq_f32(vacc0x4567, va0, vb4567c2); in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma()
79 vacc0x4567 = vfmaq_f32(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma()
93 vacc0x4567 = vfmaq_f32(vacc0x4567, va0, vb4567); in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma()
100 vacc0x4567 = vminq_f32(vacc0x4567, vmax); in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma()
104 vacc0x4567 = vmaxq_f32(vacc0x4567, vmin); in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma()
108 vst1q_f32(c0 + 4, vacc0x4567); in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma()
119 vacc0x0123 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma()
D1x8s4-minmax-sse.c43 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_minmax_ukernel_1x8s4__sse() local
56 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_1x8s4__sse()
64 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_1x8s4__sse()
72 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_1x8s4__sse()
80 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_1x8s4__sse()
96 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_1x8s4__sse()
104 vacc0x4567 = _mm_min_ps(vacc0x4567, vmax); in xnn_f32_gemm_minmax_ukernel_1x8s4__sse()
108 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_gemm_minmax_ukernel_1x8s4__sse()
112 _mm_storeu_ps(c0 + 4, vacc0x4567); in xnn_f32_gemm_minmax_ukernel_1x8s4__sse()
122 vacc0x0123 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_1x8s4__sse()
D1x8s4-minmax-wasmsimd-arm.c45 v128_t vacc0x4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm() local
58 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm()
66 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm()
74 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm()
82 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm()
98 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm()
105 vacc0x4567 = wasm_f32x4_max(vacc0x4567, vmin); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm()
108 vacc0x4567 = wasm_f32x4_min(vacc0x4567, vmax); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm()
112 wasm_v128_store(c0 + 4, vacc0x4567); in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm()
122 vacc0x0123 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm()
D1x8-minmax-wasmsimd-arm-splat.c45 v128_t vacc0x4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat() local
59 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat()
66 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c1, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat()
73 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c2, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat()
80 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c3, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat()
95 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat()
102 vacc0x4567 = wasm_f32x4_max(vacc0x4567, vmin); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat()
105 vacc0x4567 = wasm_f32x4_min(vacc0x4567, vmax); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat()
109 wasm_v128_store(c0 + 4, vacc0x4567); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat()
119 vacc0x0123 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat()
D1x8-minmax-sse-dup.c43 __m128 vacc0x4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup() local
58 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c0000, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup()
66 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c1111, vb4567c1)); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup()
74 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup()
82 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup()
97 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup()
105 vacc0x4567 = _mm_min_ps(vacc0x4567, vmax); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup()
109 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup()
113 _mm_storeu_ps(c0 + 4, vacc0x4567); in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup()
123 vacc0x0123 = vacc0x4567; in xnn_f32_gemm_minmax_ukernel_1x8__sse_dup()
/external/XNNPACK/src/f32-gemm/gen-inc/
D1x8s4inc-minmax-wasmsimd-x86.c45 v128_t vacc0x4567 = wasm_v128_load(acc + 4); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86() local
58 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86()
66 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86()
74 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86()
82 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86()
98 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86()
106 vacc0x4567 = wasm_v128_bitselect(vmin, vacc0x4567, wasm_f32x4_lt(vacc0x4567, vmin)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86()
110 vacc0x4567 = wasm_v128_bitselect(vacc0x4567, vmax, wasm_f32x4_le(vacc0x4567, vmax)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86()
114 wasm_v128_store(c0 + 4, vacc0x4567); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86()
124 vacc0x0123 = vacc0x4567; in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_x86()
D1x8inc-minmax-wasmsimd-x86-splat.c45 v128_t vacc0x4567 = wasm_v128_load(acc + 4); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat() local
59 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat()
66 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c1, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat()
73 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c2, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat()
80 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c3, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat()
95 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat()
103 vacc0x4567 = wasm_v128_bitselect(vmin, vacc0x4567, wasm_f32x4_lt(vacc0x4567, vmin)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat()
107 vacc0x4567 = wasm_v128_bitselect(vacc0x4567, vmax, wasm_f32x4_le(vacc0x4567, vmax)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat()
111 wasm_v128_store(c0 + 4, vacc0x4567); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat()
121 vacc0x0123 = vacc0x4567; in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat()
D1x8s4inc-minmax-wasmsimd-arm.c47 v128_t vacc0x4567 = wasm_v128_load(acc + 4); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm() local
60 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm()
68 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm()
76 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm()
84 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm()
100 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm()
107 vacc0x4567 = wasm_f32x4_max(vacc0x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm()
110 vacc0x4567 = wasm_f32x4_min(vacc0x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm()
114 wasm_v128_store(c0 + 4, vacc0x4567); in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm()
124 vacc0x0123 = vacc0x4567; in xnn_f32_gemminc_minmax_ukernel_1x8s4__wasmsimd_arm()
D1x8s4inc-minmax-neon.c46 float32x4_t vacc0x4567 = vld1q_f32(acc); acc += 4; in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon() local
57 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon()
65 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c1); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon()
73 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c2); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon()
81 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon()
95 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon()
102 vacc0x4567 = vminq_f32(vacc0x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon()
106 vacc0x4567 = vmaxq_f32(vacc0x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon()
110 vst1q_f32(c0 + 4, vacc0x4567); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon()
121 vacc0x0123 = vacc0x4567; in xnn_f32_gemminc_minmax_ukernel_1x8s4__neon()
D1x8s4inc-minmax-neonfma.c46 float32x4_t vacc0x4567 = vld1q_f32(acc); acc += 4; in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma() local
57 vacc0x4567 = vfmaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma()
65 vacc0x4567 = vfmaq_f32(vacc0x4567, va0, vb4567c1); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma()
73 vacc0x4567 = vfmaq_f32(vacc0x4567, va0, vb4567c2); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma()
81 vacc0x4567 = vfmaq_f32(vacc0x4567, va0, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma()
95 vacc0x4567 = vfmaq_f32(vacc0x4567, va0, vb4567); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma()
102 vacc0x4567 = vminq_f32(vacc0x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma()
106 vacc0x4567 = vmaxq_f32(vacc0x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma()
110 vst1q_f32(c0 + 4, vacc0x4567); in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma()
121 vacc0x0123 = vacc0x4567; in xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma()
D1x8inc-minmax-wasmsimd-arm-splat.c47 v128_t vacc0x4567 = wasm_v128_load(acc + 4); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat() local
61 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat()
68 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c1, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat()
75 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c2, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat()
82 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c3, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat()
97 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat()
104 vacc0x4567 = wasm_f32x4_max(vacc0x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat()
107 vacc0x4567 = wasm_f32x4_min(vacc0x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat()
111 wasm_v128_store(c0 + 4, vacc0x4567); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat()
121 vacc0x0123 = vacc0x4567; in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat()
D1x8s4inc-minmax-sse.c45 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse() local
58 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse()
66 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse()
74 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse()
82 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse()
98 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse()
106 vacc0x4567 = _mm_min_ps(vacc0x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse()
110 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse()
114 _mm_storeu_ps(c0 + 4, vacc0x4567); in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse()
124 vacc0x0123 = vacc0x4567; in xnn_f32_gemminc_minmax_ukernel_1x8s4__sse()
D1x8inc-minmax-sse-dup.c45 __m128 vacc0x4567 = _mm_load_ps(acc + 4); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup() local
60 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c0000, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup()
68 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c1111, vb4567c1)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup()
76 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c2222, vb4567c2)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup()
84 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0c3333, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup()
99 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup()
107 vacc0x4567 = _mm_min_ps(vacc0x4567, vmax); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup()
111 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup()
115 _mm_storeu_ps(c0 + 4, vacc0x4567); in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup()
125 vacc0x0123 = vacc0x4567; in xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup()
/external/XNNPACK/src/f32-igemm/gen/
D1x8-minmax-wasmsimd-x86-splat.c47 v128_t vacc0x4567 = wasm_v128_load(w + 4); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat() local
70 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
77 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c1, vb4567c1)); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
84 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c2, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
91 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c3, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
106 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
115 vacc0x4567 = wasm_v128_bitselect(vmin, vacc0x4567, wasm_f32x4_lt(vacc0x4567, vmin)); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
119 vacc0x4567 = wasm_v128_bitselect(vacc0x4567, vmax, wasm_f32x4_le(vacc0x4567, vmax)); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
123 wasm_v128_store(c0 + 4, vacc0x4567); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
132 vacc0x0123 = vacc0x4567; in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
D1x8s4-minmax-wasmsimd-x86.c47 v128_t vacc0x4567 = wasm_v128_load(w + 4); in xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86() local
69 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86()
77 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c1)); in xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86()
85 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c2)); in xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86()
93 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86()
109 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86()
118 vacc0x4567 = wasm_v128_bitselect(vmin, vacc0x4567, wasm_f32x4_lt(vacc0x4567, vmin)); in xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86()
122 vacc0x4567 = wasm_v128_bitselect(vacc0x4567, vmax, wasm_f32x4_le(vacc0x4567, vmax)); in xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86()
126 wasm_v128_store(c0 + 4, vacc0x4567); in xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86()
135 vacc0x0123 = vacc0x4567; in xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86()

12345678910>>...17