Home
last modified time | relevance | path

Searched refs:vo2 (Results 1 – 25 of 125) sorted by relevance

12345

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-scalar-3x1.c134 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
138 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
140 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
170 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
174 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
176 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
D3x3p1-minmax-wasmsimd-x86-loadsplat-3x4.c166 v128_t vo2 = wasm_f32x4_pmax(vmin, vo2p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4() local
169 vo2 = wasm_f32x4_pmin(vmax, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
171 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
234 v128_t vo2 = wasm_f32x4_pmax(vmin, vo2p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4() local
237 vo2 = wasm_f32x4_pmin(vmax, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
240 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
248 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
257 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
260 *o2 = wasm_f32x4_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-3x4.c166 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4() local
169 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
171 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
234 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4() local
237 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
240 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
248 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
257 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
260 *o2 = wasm_f32x4_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4()
D3x3p1-minmax-ssse3-3x4.c161 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() local
165 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
167 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
230 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() local
234 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
237 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
245 _mm_storel_pi((__m64*) o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
254 vo2 = _mm_movehl_ps(vo2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
257 _mm_store_ss(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
D3x3s2p1-minmax-scalar-3x1.c158 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local
162 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
164 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
202 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local
206 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
208 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
D3x3p1-minmax-wasmsimd-x86-splat-3x4.c155 v128_t vo2 = wasm_f32x4_pmax(vmin, vo2p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4() local
158 vo2 = wasm_f32x4_pmin(vmax, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
160 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
230 v128_t vo2 = wasm_f32x4_pmax(vmin, vo2p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4() local
233 vo2 = wasm_f32x4_pmin(vmax, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
236 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
241 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
247 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
250 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4()
D3x3p1-minmax-wasmsimd-arm-splat-3x4.c155 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4() local
158 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
160 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
230 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4() local
233 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
236 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
241 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
247 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
250 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4()
D3x3p1-minmax-sse-3x4.c203 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local
207 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
209 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
304 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local
308 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
311 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
319 _mm_storel_pi((__m64*) o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
328 vo2 = _mm_movehl_ps(vo2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
331 _mm_store_ss(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
D3x3p1-minmax-scalar-4x1.c154 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
159 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
163 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
199 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
204 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
208 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
D5x5p2-minmax-scalar-3x1.c248 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
252 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
254 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
351 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
355 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
357 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
416 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
420 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
422 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D3x3s2p1-minmax-scalar-4x1.c187 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
192 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
196 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
242 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
247 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
251 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
D3x3p1-minmax-scalar-5x1.c174 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
180 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
186 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
228 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
234 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
240 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-3x4.c198 v128_t vo2 = wasm_f32x4_pmax(vmin, vo2p0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
201 vo2 = wasm_f32x4_pmin(vmax, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
203 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
291 v128_t vo2 = wasm_f32x4_pmax(vmin, vo2p0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
294 vo2 = wasm_f32x4_pmin(vmax, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
298 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
303 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
309 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
312 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
D3x3p1-minmax-wasmsimd-x86-loadsplat-4x4.c190 v128_t vo2 = wasm_f32x4_pmax(vmin, vo2p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() local
194 vo2 = wasm_f32x4_pmin(vmax, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
199 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
274 v128_t vo2 = wasm_f32x4_pmax(vmin, vo2p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() local
278 vo2 = wasm_f32x4_pmin(vmax, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
284 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
294 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
303 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
309 *o2 = wasm_f32x4_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-4x4.c190 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4() local
194 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
199 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
274 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4() local
278 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
284 wasm_v128_store(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
294 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
303 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
309 *o2 = wasm_f32x4_extract_lane(vo2, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-3x4.c188 v128_t vo2 = wasm_f32x4_pmax(vmin, vo2p0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
191 vo2 = wasm_f32x4_pmin(vmax, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
193 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
281 v128_t vo2 = wasm_f32x4_pmax(vmin, vo2p0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4() local
284 vo2 = wasm_f32x4_pmin(vmax, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
288 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
293 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
299 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
302 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_3x4()
D3x3p1-minmax-ssse3-4x4.c185 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() local
190 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
195 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
270 __m128 vo2 = _mm_max_ps(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() local
275 vo2 = _mm_min_ps(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
281 _mm_storeu_ps(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
291 _mm_storel_pi((__m64*) o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
300 vo2 = _mm_movehl_ps(vo2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
306 _mm_store_ss(o2, vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
D3x3s2p1-minmax-wasmsimd-arm-splat-3x4.c188 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
191 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
193 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
281 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
284 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
288 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
293 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
299 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
302 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-3x4.c198 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
201 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
203 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
291 v128_t vo2 = wasm_f32x4_max(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
294 vo2 = wasm_f32x4_min(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
298 wasm_v128_store(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
303 *((double*) o2) = wasm_f64x2_extract_lane(vo2, 0); o2 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
309 vo2 = wasm_v32x4_shuffle(vo2, vo2, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
312 *o2 = wasm_f32x4_extract_lane(vo2, 0); o2 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
D5x5s2p2-minmax-scalar-3x1.c281 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
285 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
287 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
369 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
373 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
375 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
430 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
434 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
436 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1-acc2.c284 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
288 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
290 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
375 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
379 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
381 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
439 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
443 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
445 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
D5x5p2-minmax-scalar-3x1-acc2.c251 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
255 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
257 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
357 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
361 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
363 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
425 float vo2 = math_max_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
429 vo2 = math_min_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
431 *o2++ = vo2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
D3x3p1-minmax-neon-3x4.c154 float32x4_t vo2 = vmaxq_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() local
158 vo2 = vminq_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
160 vst1q_f32(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
230 float32x4_t vo2 = vmaxq_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() local
234 vo2 = vminq_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
237 vst1q_f32(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
243 float32x2_t vo2_lo = vget_low_f32(vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
251 vo2_lo = vget_high_f32(vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4()
D3x3p1-minmax-neonfma-3x4.c154 float32x4_t vo2 = vmaxq_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() local
158 vo2 = vminq_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
160 vst1q_f32(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
230 float32x4_t vo2 = vmaxq_f32(vo2p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() local
234 vo2 = vminq_f32(vo2, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
237 vst1q_f32(o2, vo2); o2 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
243 float32x2_t vo2_lo = vget_low_f32(vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
251 vo2_lo = vget_high_f32(vo2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4()
/external/XNNPACK/src/f16-dwconv2d-chw/gen/
D3x3s2p1-minmax-neonfp16arith-3x4.c158 float16x4_t vo2 = vmax_f16(vo2p0, vmin); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4() local
162 vo2 = vmin_f16(vo2, vmax); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4()
164 vst1_f16(o2, vo2); o2 += 4; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4()
245 float16x4_t vo2 = vmax_f16(vo2p0, vmin); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4() local
249 vo2 = vmin_f16(vo2, vmax); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4()
254 vst1_f16(o2, vo2); o2 += 4; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4()
259 vst1_lane_u32((void*) o2, vreinterpret_u32_f16(vo2), 0); o2 += 2; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4()
265 vo2 = vext_f16(vo2, vo2, 2); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4()
268 vst1_lane_f16(o2, vo2, 0); o2 += 1; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_3x4()

12345