Home
last modified time | relevance | path

Searched refs:vacc0123p0 (Results 1 – 25 of 69) sorted by relevance

123

/external/XNNPACK/src/f32-dwconv/gen/
Dup4x25-minmax-wasmsimd-x86.c165 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86() local
172 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86()
178 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86()
184 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86()
190 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86()
196 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86()
202 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86()
208 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86()
214 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86()
220 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_x86()
[all …]
Dup4x25-minmax-sse.c165 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse() local
172 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
178 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
184 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
190 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
196 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
202 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
208 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
214 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
220 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__sse()
[all …]
Dup4x25-wasmsimd.c163 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd() local
170 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd()
176 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd()
182 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd()
188 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd()
194 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd()
200 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd()
206 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd()
212 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd()
218 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x25__wasmsimd()
[all …]
Dup4x25-minmax-neonfma.c166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma() local
171 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma()
175 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma()
179 vacc0123p0 = vfmaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma()
183 vacc0123p0 = vfmaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma()
187 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma()
191 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma()
195 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma()
199 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma()
203 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma()
[all …]
Dup4x25-minmax-wasmsimd-arm.c165 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm() local
172 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm()
178 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm()
184 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm()
190 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm()
196 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm()
202 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm()
208 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm()
214 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm()
220 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x25__wasmsimd_arm()
[all …]
Dup4x25-minmax-neon.c166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x25__neon() local
171 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon()
175 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon()
179 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon()
183 vacc0123p0 = vmlaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon()
187 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon()
191 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon()
195 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon()
199 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon()
203 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up4x25__neon()
[all …]
Dup8x25-minmax-wasmsimd-x86.c165 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86() local
175 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86()
184 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86()
193 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86()
202 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86()
211 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86()
220 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86()
229 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86()
238 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86()
247 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_x86()
[all …]
Dup8x25-minmax-sse.c165 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse() local
175 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
184 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
193 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
202 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
211 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
220 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
229 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
238 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
247 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__sse()
[all …]
Dup8x25-minmax-neon.c166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() local
174 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon()
181 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon()
188 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon()
195 vacc0123p0 = vmlaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon()
202 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon()
209 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon()
216 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon()
223 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon()
230 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon()
[all …]
Dup8x25-minmax-neonfma.c166 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma() local
174 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma()
181 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma()
188 vacc0123p0 = vfmaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma()
195 vacc0123p0 = vfmaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma()
202 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma()
209 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma()
216 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma()
223 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma()
230 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma()
[all …]
Dup8x25-wasmsimd.c163 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd() local
173 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd()
182 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd()
191 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd()
200 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd()
209 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd()
218 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd()
227 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd()
236 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd()
245 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x25__wasmsimd()
[all …]
Dup8x25-minmax-wasmsimd-arm.c165 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm() local
175 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm()
184 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm()
193 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm()
202 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm()
211 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm()
220 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm()
229 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm()
238 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm()
247 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x25__wasmsimd_arm()
[all …]
Dup8x9-minmax-wasmsimd-x86.c85 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86() local
95 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
104 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
113 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
122 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
131 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
140 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
149 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
158 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
167 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86()
[all …]
Dup8x9-minmax-neonfma.c86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma() local
94 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
101 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
108 vacc0123p0 = vfmaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
115 vacc0123p0 = vfmaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
122 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
129 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
136 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
143 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
150 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma()
[all …]
Dup8x9-minmax-sse.c85 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse() local
95 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
104 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
113 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
122 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
131 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
140 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
149 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
158 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
167 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__sse()
[all …]
Dup8x9-minmax-wasmsimd-arm.c85 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm() local
95 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
104 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
113 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
122 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
131 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
140 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
149 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
158 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
167 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm()
[all …]
Dup8x9-minmax-neon.c86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up8x9__neon() local
94 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
101 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
108 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
115 vacc0123p0 = vmlaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
122 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
129 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
136 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
143 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
150 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up8x9__neon()
[all …]
Dup8x9-wasmsimd.c83 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd() local
93 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
102 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
111 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
120 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
129 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
138 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
147 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
156 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
165 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up8x9__wasmsimd()
[all …]
Dup4x9-minmax-wasmsimd-x86.c85 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86() local
92 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
98 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
104 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
110 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
116 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
122 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
128 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
134 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
140 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86()
[all …]
Dup4x9-wasmsimd.c83 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd() local
90 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
96 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
102 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
108 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
114 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
120 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
126 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
132 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
138 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_ukernel_up4x9__wasmsimd()
[all …]
Dup4x9-minmax-wasmsimd-arm.c85 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm() local
92 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
98 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
104 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
110 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
116 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
122 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
128 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
134 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
140 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm()
[all …]
Dup4x9-minmax-sse.c85 __m128 vacc0123p0 = _mm_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse() local
92 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
98 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
104 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
110 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
116 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
122 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
128 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi6x0123, vk6x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
134 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi7x0123, vk7x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
140 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi8x0123, vk8x0123)); in xnn_f32_dwconv_minmax_ukernel_up4x9__sse()
[all …]
Dup4x9-minmax-neon.c86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neon() local
91 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
95 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
99 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
103 vacc0123p0 = vmlaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
107 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
111 vacc0123p0 = vmlaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
115 vacc0123p0 = vmlaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
119 vacc0123p0 = vmlaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
123 vacc0123p0 = vmlaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neon()
[all …]
Dup4x9-minmax-neonfma.c86 float32x4_t vacc0123p0 = vld1q_f32(w); w += 4; in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma() local
91 vacc0123p0 = vfmaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
95 vacc0123p0 = vfmaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
99 vacc0123p0 = vfmaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
103 vacc0123p0 = vfmaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
107 vacc0123p0 = vfmaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
111 vacc0123p0 = vfmaq_f32(vacc0123p0, vi5x0123, vk5x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
115 vacc0123p0 = vfmaq_f32(vacc0123p0, vi6x0123, vk6x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
119 vacc0123p0 = vfmaq_f32(vacc0123p0, vi7x0123, vk7x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
123 vacc0123p0 = vfmaq_f32(vacc0123p0, vi8x0123, vk8x0123); in xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma()
[all …]
Dup8x4-minmax-wasmsimd-x86.c60 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() local
70 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86()
79 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86()
88 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86()
97 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi3x0123, vk3x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86()
103 v128_t vacc0123 = wasm_v128_bitselect(vmin, vacc0123p0, wasm_f32x4_lt(vacc0123p0, vmin)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86()
114 v128_t vacc0123p0 = wasm_v128_load(w); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86() local
120 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi0x0123, vk0x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86()
126 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi1x0123, vk1x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86()
132 vacc0123p0 = wasm_f32x4_add(vacc0123p0, wasm_f32x4_mul(vi2x0123, vk2x0123)); in xnn_f32_dwconv_minmax_ukernel_up8x4__wasmsimd_x86()
[all …]

123