Home
last modified time | relevance | path

Searched refs:vi1x2 (Results 1 – 25 of 100) sorted by relevance

1234

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-scalar-2x1.c99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
152 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
159 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
160 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
170 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
247 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
254 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
255 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
265 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
319 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
[all …]
D5x5p2-minmax-scalar-1x1-acc4.c91 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local
130 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
136 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
196 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
202 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
208 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
246 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
D5x5p2-minmax-scalar-1x1-acc3.c91 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local
130 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
136 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
195 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
201 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
207 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
244 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1.c91 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local
130 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
136 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
193 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
199 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
205 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
240 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
D5x5p2-minmax-scalar-1x1-acc2.c91 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local
130 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
136 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
194 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
200 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
206 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
242 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
D5x5p2-minmax-scalar-2x1-acc3.c99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
152 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
159 vo1p1 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
160 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
170 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
251 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
258 vo1p1 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
259 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
269 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
327 vo1p1 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
[all …]
D5x5p2-minmax-scalar-2x1-acc2.c99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
152 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
159 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
160 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
170 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
249 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
256 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
257 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
267 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
323 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
[all …]
D5x5p2-minmax-scalar-1x1-acc5.c91 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local
130 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
136 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
197 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
203 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
209 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
248 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
D5x5p2-minmax-scalar-3x1.c107 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
174 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
182 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
184 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
198 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
301 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
309 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
311 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
325 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
398 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
[all …]
D5x5p2-minmax-scalar-3x1-acc2.c107 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
174 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
182 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
184 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
198 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
304 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
312 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
314 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
328 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
404 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
[all …]
D5x5s2p2-minmax-scalar-1x1-acc2.c99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local
130 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
148 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
154 vi1x2 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
199 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
231 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
D5x5s2p2-minmax-scalar-1x1-acc4.c99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local
130 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
148 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
154 vi1x2 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
201 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
235 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1.c99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local
130 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
148 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
154 vi1x2 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
198 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
229 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
D5x5s2p2-minmax-scalar-1x1-acc3.c99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local
130 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
148 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
154 vi1x2 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
200 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
233 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
D5x5s2p2-minmax-scalar-1x1-acc5.c99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local
130 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
148 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
154 vi1x2 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
202 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
237 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
D3x3p1-minmax-scalar-2x1-acc2.c76 const float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local
100 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
105 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
106 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
D3x3p1-minmax-scalar-2x1.c76 const float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local
100 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
105 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
106 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
D3x3p1-minmax-scalar-1x1-acc3.c68 const float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() local
84 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
88 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
D3x3p1-minmax-scalar-1x1-acc2.c68 const float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2() local
84 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
88 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
D3x3p1-minmax-scalar-1x1.c68 const float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1() local
84 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
88 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
/external/XNNPACK/src/f32-conv-hwc2chw/
D3x3s2p1c3x4-wasmsimd-2x2.c141 const v128_t vi1x2 = wasm_v128_load(i1); i1 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
155 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk10c1, wasm_v32x4_shuffle(vi1x2, vi1x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
176 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk10c2, wasm_v32x4_shuffle(vi1x2, vi1x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
197 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk11c0, wasm_v32x4_shuffle(vi1x2, vi1x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
218 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk11c1, wasm_v32x4_shuffle(vi1x2, vi1x2, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
286 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk12c1, wasm_v32x4_shuffle(vi1x2, vi1x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
307 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk12c2, wasm_v32x4_shuffle(vi1x2, vi1x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
394 v128_t vi1x2 = wasm_f32x4_const_splat(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
401 vi1x2 = wasm_v128_load(i1 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
416 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk10c1, wasm_v32x4_shuffle(vi1x2, vi1x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
[all …]
D3x3s2p1c3x4-sse-2x2.c141 const __m128 vi1x2 = _mm_loadu_ps(i1); i1 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
155 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk10c1, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
176 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk10c2, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
197 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk11c0, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
218 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk11c1, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
286 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk12c1, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
307 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk12c2, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
395 __m128 vi1x2 = _mm_setzero_ps(); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
402 vi1x2 = _mm_loadu_ps(i1 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
417 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk10c1, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
[all …]
D3x3s2p1c3x4-neonfma-2x2.c141 const float32x4_t vi1x2 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
155 vo0x1 = vfmaq_laneq_f32(vo0x1, vk10c1, vi1x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
176 vo0x1 = vfmaq_laneq_f32(vo0x1, vk10c2, vi1x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
197 vo0x1 = vfmaq_laneq_f32(vo0x1, vk11c0, vi1x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
218 vo0x1 = vfmaq_laneq_f32(vo0x1, vk11c1, vi1x2, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
286 vo0x0 = vfmaq_laneq_f32(vo0x0, vk12c1, vi1x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
307 vo0x0 = vfmaq_laneq_f32(vo0x0, vk12c2, vi1x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
395 float32x4_t vi1x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
402 vi1x2 = vld1q_f32(i1 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
417 vo0x1 = vfmaq_laneq_f32(vo0x1, vk10c1, vi1x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
[all …]
/external/XNNPACK/src/f32-prelu/gen/
Dwasm-2x4.c59 float vi1x2 = i1[2]; in xnn_f32_prelu_ukernel__wasm_2x4() local
75 float vacc1x2 = __builtin_wasm_max_f32(vi1x2, vzero); in xnn_f32_prelu_ukernel__wasm_2x4()
76 vi1x2 = __builtin_wasm_min_f32(vi1x2, vzero); in xnn_f32_prelu_ukernel__wasm_2x4()
86 vacc1x2 += vi1x2 * vw2; in xnn_f32_prelu_ukernel__wasm_2x4()
Dscalar-2x4.c58 const float vi1x2 = i1[2]; in xnn_f32_prelu_ukernel__scalar_2x4() local
68 const float vacc1x2 = XNN_UNPREDICTABLE(vi1x2 < 0.0f) ? vi1x2 * vw2 : vi1x2; in xnn_f32_prelu_ukernel__scalar_2x4()

1234