Home
last modified time | relevance | path

Searched refs:vk21 (Results 1 – 25 of 237) sorted by relevance

12345678910

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-scalar-6x1.c42 const float vk21 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local
156 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
157 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
158 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
159 vo3p0 += vi5x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
160 vo4p0 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
161 vo5p0 += vi7x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
247 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
248 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
249 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
[all …]
D3x3p1-minmax-scalar-5x1.c42 const float vk21 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
141 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
142 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
143 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
144 vo3p0 += vi5x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
145 vo4p0 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
219 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
220 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
221 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
222 vo3p0 += vi5x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
[all …]
D3x3p1-minmax-scalar-4x1.c42 const float vk21 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
126 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
127 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
128 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
129 vo3p0 += vi5x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
191 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
192 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
193 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
194 vo3p0 += vi5x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
D3x3s2p1-minmax-scalar-4x1.c43 const float vk21 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
156 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
157 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
158 vo2p0 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
159 vo3p0 += vi8x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
234 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
235 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
236 vo2p0 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
237 vo3p0 += vi8x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
D3x3p1-minmax-scalar-3x1.c42 const float vk21 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
111 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
112 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
113 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
163 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
164 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
165 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
D3x3s2p1-minmax-scalar-3x1.c43 const float vk21 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local
133 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
134 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
135 vo2p0 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
195 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
196 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
197 vo2p0 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
D3x3p1-minmax-scalar-2x1-acc2.c42 const float vk21 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local
96 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
97 vo1p1 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
137 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
138 vo1p1 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
D3x3p1-minmax-scalar-2x1.c42 const float vk21 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local
96 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
97 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
135 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
136 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
D3x3s2p1-minmax-scalar-2x1-acc2.c43 const float vk21 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() local
110 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
111 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
158 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
159 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
D3x3s2p1-minmax-scalar-2x1.c43 const float vk21 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() local
110 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
111 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
156 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
157 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
D5x5p2-minmax-scalar-3x1.c46 const float vk21 = weights[12]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
163 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
164 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
165 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
290 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
291 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
292 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
387 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
388 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
389 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1.c47 const float vk21 = weights[12]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
200 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
201 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
202 vo2p0 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
324 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
325 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
326 vo2p0 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
401 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
402 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
403 vo2p0 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1-acc2.c47 const float vk21 = weights[12]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
200 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
201 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
202 vo2p1 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
327 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
328 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
329 vo2p1 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
407 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
408 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
409 vo2p1 += vi6x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
D5x5p2-minmax-scalar-3x1-acc2.c46 const float vk21 = weights[12]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
163 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
164 vo1p1 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
165 vo2p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
293 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
294 vo1p1 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
295 vo2p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
393 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
394 vo1p1 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
395 vo2p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
D3x3p1-minmax-ssse3-6x4.c45 const __m128 vk21 = _mm_load1_ps(weights + 8); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local
149 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
150 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
151 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
152 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi5x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
153 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi6x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
154 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi7x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
283 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
284 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
285 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
[all …]
D3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c50 const v128_t vk21 = wasm_v32x4_shuffle(vw89, vw89, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local
154 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
155 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
156 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
157 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
158 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi6x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
159 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
287 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
288 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
289 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
[all …]
D3x3p1-minmax-wasmsimd-arm-loadsplat-6x4.c50 const v128_t vk21 = wasm_v32x4_shuffle(vw89, vw89, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local
154 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
155 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
156 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
157 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
158 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi6x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
159 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
287 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
288 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
289 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
[all …]
D3x3p1-minmax-wasmsimd-x86-loadsplat-5x4.c50 const v128_t vk21 = wasm_v32x4_shuffle(vw89, vw89, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() local
141 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
142 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
143 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
144 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
145 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi6x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
256 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
257 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
258 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
259 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
[all …]
D3x3p1-minmax-wasmsimd-arm-loadsplat-5x4.c50 const v128_t vk21 = wasm_v32x4_shuffle(vw89, vw89, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local
141 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
142 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
143 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
144 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
145 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi6x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
256 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
257 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
258 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
259 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
[all …]
D5x5p2-minmax-scalar-2x1.c46 const float vk21 = weights[12]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
144 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
145 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
239 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
240 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
311 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
312 vo1p0 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
D3x3p1-minmax-sse-6x4.c45 const __m128 vk21 = _mm_load1_ps(weights + 8); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local
182 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
183 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
184 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
185 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi5x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
186 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi6x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
187 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi7x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
366 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
367 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
368 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi4x4567, vk21)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
[all …]
D5x5s2p2-minmax-scalar-2x1-acc3.c47 const float vk21 = weights[12]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
169 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
170 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
263 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
264 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
321 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
322 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc3.c46 const float vk21 = weights[12]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
144 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
145 vo1p1 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
243 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
244 vo1p1 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
319 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
320 vo1p1 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc2.c46 const float vk21 = weights[12]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
144 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
145 vo1p1 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
241 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
242 vo1p1 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
315 vo0p1 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
316 vo1p1 += vi3x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
D5x5s2p2-minmax-scalar-2x1.c47 const float vk21 = weights[12]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
169 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
170 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
259 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
260 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
313 vo0p0 += vi2x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
314 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()

12345678910