Home
last modified time | relevance | path

Searched refs:vo0 (Results 1 – 25 of 388) sorted by relevance

12345678910>>...16

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-wasmsimd-arm-splat-1x4-acc3.c109 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() local
110 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
112 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
154 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() local
155 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
158 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
161 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
163 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
166 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
D3x3p1-minmax-wasmsimd-arm-splat-1x4-acc4.c110 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() local
111 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
113 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
156 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() local
157 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
160 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
163 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
165 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
168 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
D3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c118 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() local
119 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
121 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
156 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() local
157 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
160 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
164 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
167 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
170 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
D3x3p1-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c117 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() local
118 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
120 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
154 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() local
155 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
158 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
162 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
165 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
168 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
D3x3p1-minmax-wasmsimd-x86-loadsplat-1x4.c116 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() local
117 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
119 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
152 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() local
153 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
156 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
160 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
163 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
166 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
D3x3p1-minmax-wasmsimd-x86-splat-1x4-acc4.c110 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() local
111 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
113 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
156 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() local
157 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
160 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
163 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
165 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
168 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
D3x3p1-minmax-wasmsimd-arm-splat-1x4.c107 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() local
108 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
110 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
150 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() local
151 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
154 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
157 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
159 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
162 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
D3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c117 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() local
118 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
120 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
154 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() local
155 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
158 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
162 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
165 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
168 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
D3x3p1-minmax-wasmsimd-x86-splat-1x4-acc2.c108 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() local
109 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
111 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
152 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() local
153 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
156 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
159 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
161 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
164 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
D3x3p1-minmax-wasmsimd-arm-splat-1x4-acc2.c108 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() local
109 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
111 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
152 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() local
153 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
156 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
159 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
161 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
164 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
D3x3p1-minmax-wasmsimd-x86-splat-1x4-acc3.c109 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() local
110 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
112 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
154 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() local
155 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
158 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
161 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
163 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
166 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
D3x3p1-minmax-ssse3-1x4.c111 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() local
113 vo0 = _mm_min_ps(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
115 _mm_storeu_ps(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
148 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() local
150 vo0 = _mm_min_ps(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
153 _mm_storeu_ps(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
157 _mm_storel_pi((__m64*) o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
160 vo0 = _mm_movehl_ps(vo0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
163 _mm_store_ss(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-1x4-acc3.c118 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() local
119 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
121 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
156 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() local
157 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
160 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
164 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
167 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
170 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
D3x3p1-minmax-wasmsimd-arm-loadsplat-1x4.c116 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() local
117 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
119 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
152 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() local
153 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
156 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
160 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
163 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
166 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
D3x3p1-minmax-wasmsimd-x86-splat-1x4.c107 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() local
108 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
110 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
150 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() local
151 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
154 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
157 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
159 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
162 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
D3x3p1-minmax-scalar-1x1-acc3.c94 float vo0 = math_max_f32(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() local
96 vo0 = math_min_f32(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
98 *o0++ = vo0; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
114 float vo0 = math_max_f32(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() local
116 vo0 = math_min_f32(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
118 *o0++ = vo0; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
D3x3p1-minmax-scalar-1x1-acc2.c93 float vo0 = math_max_f32(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2() local
95 vo0 = math_min_f32(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
97 *o0++ = vo0; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
112 float vo0 = math_max_f32(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2() local
114 vo0 = math_min_f32(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
116 *o0++ = vo0; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
D3x3p1-minmax-scalar-1x1.c92 float vo0 = math_max_f32(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1() local
94 vo0 = math_min_f32(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
96 *o0++ = vo0; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
110 float vo0 = math_max_f32(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1() local
112 vo0 = math_min_f32(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
114 *o0++ = vo0; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
D3x3p1-minmax-ssse3-1x4-acc3.c113 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() local
115 vo0 = _mm_min_ps(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
117 _mm_storeu_ps(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
152 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() local
154 vo0 = _mm_min_ps(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
157 _mm_storeu_ps(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
161 _mm_storel_pi((__m64*) o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
164 vo0 = _mm_movehl_ps(vo0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
167 _mm_store_ss(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
D3x3p1-minmax-ssse3-1x4-acc2.c112 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() local
114 vo0 = _mm_min_ps(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
116 _mm_storeu_ps(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
150 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() local
152 vo0 = _mm_min_ps(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
155 _mm_storeu_ps(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
159 _mm_storel_pi((__m64*) o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
162 vo0 = _mm_movehl_ps(vo0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
165 _mm_store_ss(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
D3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc4.c119 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() local
120 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
122 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
158 v128_t vo0 = wasm_f32x4_pmax(vmin, vo0p0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() local
159 vo0 = wasm_f32x4_pmin(vmax, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
162 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
166 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
169 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
172 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
/external/XNNPACK/src/f16-dwconv2d-chw/gen/
D3x3s2p1-minmax-neonfp16arith-1x4.c99 float16x4_t vo0 = vmax_f16(vo0p0, vmin); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4() local
101 vo0 = vmin_f16(vo0, vmax); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4()
103 vst1_f16(o0, vo0); o0 += 4; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4()
144 float16x4_t vo0 = vmax_f16(vo0p0, vmin); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4() local
146 vo0 = vmin_f16(vo0, vmax); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4()
151 vst1_f16(o0, vo0); o0 += 4; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4()
154 vst1_lane_u32((void*) o0, vreinterpret_u32_f16(vo0), 0); o0 += 2; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4()
156 vo0 = vext_f16(vo0, vo0, 2); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4()
159 vst1_lane_f16(o0, vo0, 0); o0 += 1; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4()
D3x3s2p1-minmax-neonfp16arith-1x4-acc3.c101 float16x4_t vo0 = vmax_f16(vo0p0, vmin); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3() local
103 vo0 = vmin_f16(vo0, vmax); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
105 vst1_f16(o0, vo0); o0 += 4; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
148 float16x4_t vo0 = vmax_f16(vo0p0, vmin); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3() local
150 vo0 = vmin_f16(vo0, vmax); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
155 vst1_f16(o0, vo0); o0 += 4; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
158 vst1_lane_u32((void*) o0, vreinterpret_u32_f16(vo0), 0); o0 += 2; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
160 vo0 = vext_f16(vo0, vo0, 2); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
163 vst1_lane_f16(o0, vo0, 0); o0 += 1; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc3()
D3x3s2p1-minmax-neonfp16arith-1x4-acc2.c100 float16x4_t vo0 = vmax_f16(vo0p0, vmin); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2() local
102 vo0 = vmin_f16(vo0, vmax); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2()
104 vst1_f16(o0, vo0); o0 += 4; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2()
146 float16x4_t vo0 = vmax_f16(vo0p0, vmin); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2() local
148 vo0 = vmin_f16(vo0, vmax); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2()
153 vst1_f16(o0, vo0); o0 += 4; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2()
156 vst1_lane_u32((void*) o0, vreinterpret_u32_f16(vo0), 0); o0 += 2; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2()
158 vo0 = vext_f16(vo0, vo0, 2); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2()
161 vst1_lane_f16(o0, vo0, 0); o0 += 1; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc2()
D3x3s2p1-minmax-neonfp16arith-1x4-acc4.c102 float16x4_t vo0 = vmax_f16(vo0p0, vmin); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4() local
104 vo0 = vmin_f16(vo0, vmax); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
106 vst1_f16(o0, vo0); o0 += 4; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
150 float16x4_t vo0 = vmax_f16(vo0p0, vmin); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4() local
152 vo0 = vmin_f16(vo0, vmax); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
157 vst1_f16(o0, vo0); o0 += 4; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
160 vst1_lane_u32((void*) o0, vreinterpret_u32_f16(vo0), 0); o0 += 2; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
162 vo0 = vext_f16(vo0, vo0, 2); in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()
165 vst1_lane_f16(o0, vo0, 0); o0 += 1; in xnn_f16_dwconv2d_chw_ukernel_3x3s2p1__neonfp16arith_1x4_acc4()

12345678910>>...16