Home
last modified time | relevance | path

Searched refs:vo2 (Results 1 – 25 of 109) sorted by relevance

12345

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-wasmsimd-x86-loadsplat-3x4.c168 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4() local
171 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
173 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
235 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4() local
238 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
241 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
249 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
258 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
261 *o2 = wasm_f32x4_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
D3x3p1-minmax-scalar-3x1.c134 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
138 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
140 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
170 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
174 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
176 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
D3x3p1-minmax-wasmsimd-x86-splat-3x4.c157 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4() local
160 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
162 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
231 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4() local
234 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
237 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
242 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
248 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
251 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-3x4.c168 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4() local
171 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
173 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
235 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4() local
238 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
241 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
249 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
258 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
261 *o2 = wasm_f32x4_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
D3x3p1-minmax-ssse3-3x4.c161 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() local
165 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
167 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
230 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() local
234 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
237 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
245 _mm_storel_pi((__m64*) o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
254 vo2 = _mm_movehl_ps(vo2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
257 _mm_store_ss(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
D3x3s2p1-minmax-scalar-3x1.c158 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local
162 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
164 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
202 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local
206 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
208 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-3x4.c200 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
203 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
205 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
293 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
296 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
300 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
305 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
311 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
314 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
D3x3p1-minmax-wasmsimd-x86-loadsplat-4x4.c192 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() local
196 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
201 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
275 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() local
279 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
285 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
295 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
304 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
310 *o2 = wasm_f32x4_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
D3x3p1-minmax-scalar-4x1.c154 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
159 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
163 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
199 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
204 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
208 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
D3x3p1-minmax-wasmsimd-arm-splat-3x4.c157 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4() local
160 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
162 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
231 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4() local
234 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
237 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
242 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
248 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
251 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
D3x3p1-minmax-sse-3x4.c203 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local
207 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
209 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
304 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local
308 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
311 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
319 _mm_storel_pi((__m64*) o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
328 vo2 = _mm_movehl_ps(vo2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
331 _mm_store_ss(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
D3x3p1-minmax-wasmsimd-x86-splat-4x4.c180 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4() local
184 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
188 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
270 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4() local
274 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
279 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
285 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
291 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
296 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-3x4.c190 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
193 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
195 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
283 v128_t vo2 = wasm_v128_bitselect(vmin, vo2p0, wasm_f32x4_lt(vo2p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
286 vo2 = wasm_v128_bitselect(vo2, vmax, wasm_f32x4_le(vo2, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
290 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
295 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
301 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
304 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
D3x3p1-minmax-scalar-5x1.c174 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
180 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
186 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
228 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
234 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
240 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
D3x3s2p1-minmax-scalar-4x1.c187 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
192 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
196 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
242 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
247 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
251 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
D5x5p2-minmax-scalar-3x1.c248 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
252 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
254 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
351 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
355 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
357 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
416 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
420 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
422 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D3x3s2p1-minmax-wasmsimd-arm-splat-3x4.c190 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
193 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
195 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
283 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
286 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
290 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
295 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
301 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
304 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
D3x3p1-minmax-ssse3-4x4.c185 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() local
190 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
195 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
270 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() local
275 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
281 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
291 _mm_storel_pi((__m64*) o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
300 vo2 = _mm_movehl_ps(vo2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
306 _mm_store_ss(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-3x4.c200 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
203 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
205 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
293 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
296 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
300 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
305 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
311 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
314 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-4x4.c192 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4() local
196 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
201 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
275 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4() local
279 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
285 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
295 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
304 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
310 *o2 = wasm_f32x4_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
D5x5p2-minmax-scalar-3x1-acc2.c251 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
255 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
257 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
357 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
361 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
363 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
425 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
429 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
431 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
D5x5s2p2-minmax-scalar-3x1-acc2.c284 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
288 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
290 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
375 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
379 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
381 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
439 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
443 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
445 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
D5x5s2p2-minmax-scalar-3x1.c281 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
285 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
287 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
369 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
373 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
375 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
430 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
434 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
436 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D3x3p1-minmax-neonfma-3x4.c154 float32x4_t vo2 = vmaxq_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() local
158 vo2 = vminq_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
160 vst1q_f32(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
230 float32x4_t vo2 = vmaxq_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() local
234 vo2 = vminq_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
237 vst1q_f32(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
243 float32x2_t vo2_lo = vget_low_f32(vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
251 vo2_lo = vget_high_f32(vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
D3x3p1-minmax-neon-3x4.c154 float32x4_t vo2 = vmaxq_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() local
158 vo2 = vminq_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
160 vst1q_f32(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
230 float32x4_t vo2 = vmaxq_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() local
234 vo2 = vminq_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
237 vst1q_f32(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
243 float32x2_t vo2_lo = vget_low_f32(vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
251 vo2_lo = vget_high_f32(vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()

12345