/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-scalar-3x1.c | 48 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 211 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 212 vo1p0 += vi3x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 213 vo2p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 338 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 339 vo1p0 += vi3x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 340 vo2p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
|
D | 5x5s2p2-minmax-scalar-3x1.c | 49 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local 252 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 253 vo1p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 254 vo2p0 += vi6x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 356 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 357 vo1p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 358 vo2p0 += vi6x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
|
D | 5x5s2p2-minmax-scalar-3x1-acc2.c | 49 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 252 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 253 vo1p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 254 vo2p1 += vi6x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 359 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 360 vo1p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 361 vo2p1 += vi6x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 48 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 211 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 212 vo1p1 += vi3x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 213 vo2p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 341 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 342 vo1p1 += vi3x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 343 vo2p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
|
D | 5x5p2-minmax-scalar-2x1.c | 48 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 180 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 181 vo1p0 += vi3x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 275 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 276 vo1p0 += vi3x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 49 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 207 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 208 vo1p2 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 285 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 286 vo1p2 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 48 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 180 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 181 vo1p2 += vi3x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 279 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 280 vo1p2 += vi3x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 48 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 180 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 181 vo1p1 += vi3x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 277 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 278 vo1p1 += vi3x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1.c | 49 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 207 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 208 vo1p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 281 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 282 vo1p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
|
D | 5x5s2p2-minmax-scalar-2x1-acc2.c | 49 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 207 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 208 vo1p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 283 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 284 vo1p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
|
D | 5x5p2-minmax-scalar-1x1-acc4.c | 48 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local 149 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 215 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
|
D | 5x5s2p2-minmax-scalar-1x1-acc2.c | 49 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local 161 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 206 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
|
D | 5x5p2-minmax-scalar-1x1-acc3.c | 48 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local 149 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 214 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1.c | 48 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local 149 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 212 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
|
D | 5x5s2p2-minmax-scalar-1x1-acc4.c | 49 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local 161 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 208 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
|
D | 5x5s2p2-minmax-scalar-1x1.c | 49 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local 161 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 205 vo0p0 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
|
D | 5x5p2-minmax-scalar-1x1-acc2.c | 48 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local 149 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 213 vo0p1 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
|
D | 5x5s2p2-minmax-scalar-1x1-acc3.c | 49 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local 161 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 207 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c | 59 const v128_t vk23 = wasm_v32x4_shuffle(vwCDEF, vwCDEF, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 294 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 295 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 296 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 297 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 298 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi6x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 548 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 549 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 550 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 551 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() [all …]
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c | 59 const v128_t vk23 = wasm_v32x4_shuffle(vwCDEF, vwCDEF, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local 294 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 295 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 296 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 297 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 298 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi6x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 548 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 549 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 550 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 551 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() [all …]
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 48 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 149 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 216 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
|
D | 5x5s2p2-minmax-scalar-1x1-acc5.c | 49 const float vk23 = weights[14]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local 161 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 209 vo0p2 += vi2x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
|
D | 5x5p2-minmax-sse-5x4.c | 51 const __m128 vk23 = _mm_load1_ps(weights + 14); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 313 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 314 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 315 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi4x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 316 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi5x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 317 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi6x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 566 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 567 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 568 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi4x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 569 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi5x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() [all …]
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c | 59 const v128_t vk23 = wasm_v32x4_shuffle(vwCDEF, vwCDEF, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local 263 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 264 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi3x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 265 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi4x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 266 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi5x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 484 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 485 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi3x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 486 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi4x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 487 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi5x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 692 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() [all …]
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c | 59 const v128_t vk23 = wasm_v32x4_shuffle(vwCDEF, vwCDEF, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local 263 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 264 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi3x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 265 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi4x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 266 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi5x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 484 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 485 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi3x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 486 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi4x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 487 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi5x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 692 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi2x5678, vk23)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() [all …]
|