/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-scalar-3x1.c | 58 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 217 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 218 vo1p0 += vi5x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 219 vo2p0 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 344 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 345 vo1p0 += vi5x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 346 vo2p0 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
|
D | 5x5s2p2-minmax-scalar-3x1.c | 59 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local 258 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 259 vo1p0 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 260 vo2p0 += vi8x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 362 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 363 vo1p0 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 364 vo2p0 += vi8x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
|
D | 5x5s2p2-minmax-scalar-3x1-acc2.c | 59 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 258 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 259 vo1p1 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 260 vo2p1 += vi8x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 365 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 366 vo1p1 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 367 vo2p1 += vi8x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 58 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 217 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 218 vo1p1 += vi5x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 219 vo2p1 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 347 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 348 vo1p1 += vi5x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 349 vo2p1 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
|
D | 5x5p2-minmax-scalar-2x1.c | 58 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 184 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 185 vo1p0 += vi5x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 279 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 280 vo1p0 += vi5x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 59 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 211 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 212 vo1p1 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 289 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 290 vo1p1 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 58 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 184 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 185 vo1p1 += vi5x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 283 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 284 vo1p1 += vi5x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 58 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 184 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 185 vo1p1 += vi5x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 281 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 282 vo1p1 += vi5x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1.c | 59 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 211 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 212 vo1p0 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 285 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 286 vo1p0 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
|
D | 5x5s2p2-minmax-scalar-2x1-acc2.c | 59 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 211 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 212 vo1p1 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 287 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 288 vo1p1 += vi6x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
|
D | 5x5p2-minmax-scalar-1x1-acc4.c | 58 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local 151 vo0p3 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 217 vo0p3 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
|
D | 5x5s2p2-minmax-scalar-1x1-acc2.c | 59 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local 163 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 208 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
|
D | 5x5p2-minmax-scalar-1x1-acc3.c | 58 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local 151 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 216 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1.c | 58 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local 151 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 214 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
|
D | 5x5s2p2-minmax-scalar-1x1-acc4.c | 59 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local 163 vo0p3 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 210 vo0p3 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
|
D | 5x5s2p2-minmax-scalar-1x1.c | 59 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local 163 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 207 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
|
D | 5x5p2-minmax-scalar-1x1-acc2.c | 58 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local 151 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 215 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
|
D | 5x5s2p2-minmax-scalar-1x1-acc3.c | 59 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local 163 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 209 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c | 69 const v128_t vk43 = wasm_v32x4_shuffle(vwOP, vwOP, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 306 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 307 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 308 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 309 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 310 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 560 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 561 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 562 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 563 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() [all …]
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c | 69 const v128_t vk43 = wasm_v32x4_shuffle(vwOP, vwOP, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local 306 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 307 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 308 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 309 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 310 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 560 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 561 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 562 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi6x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 563 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() [all …]
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 58 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 151 vo0p4 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 218 vo0p4 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
|
D | 5x5s2p2-minmax-scalar-1x1-acc5.c | 59 const float vk43 = weights[24]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local 163 vo0p4 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 211 vo0p4 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
|
D | 5x5p2-minmax-sse-5x4.c | 61 const __m128 vk43 = _mm_load1_ps(weights + 24); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 323 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 324 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 325 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi6x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 326 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 327 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi8x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 576 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 577 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi5x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 578 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi6x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 579 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() [all …]
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c | 69 const v128_t vk43 = wasm_v32x4_shuffle(vwOP, vwOP, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local 273 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 274 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 275 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi6x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 276 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 494 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 495 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 496 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi6x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 497 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() 702 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() [all …]
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c | 69 const v128_t vk43 = wasm_v32x4_shuffle(vwOP, vwOP, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local 273 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 274 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 275 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi6x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 276 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 494 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 495 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 496 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi6x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 497 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() 702 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi4x5678, vk43)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() [all …]
|