/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-scalar-6x1.c | 34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local 116 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 117 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 118 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 119 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 120 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 121 float vo5p0 = vbias + vi5x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 216 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 217 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 218 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() [all …]
|
D | 3x3p1-minmax-scalar-5x1.c | 34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local 107 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 108 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 109 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 110 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 111 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 193 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 194 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 195 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 196 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() [all …]
|
D | 3x3p1-minmax-scalar-4x1.c | 34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local 98 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 99 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 100 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 101 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 170 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 171 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 172 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 173 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
|
D | 3x3s2p1-minmax-scalar-4x1.c | 35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local 116 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 117 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 118 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 119 float vo3p0 = vbias + vi6x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 213 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 214 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 215 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 216 float vo3p0 = vbias + vi6x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
|
D | 3x3p1-minmax-scalar-3x1.c | 34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local 89 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 90 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 91 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 147 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 148 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 149 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
|
D | 3x3s2p1-minmax-scalar-3x1.c | 35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local 102 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 103 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 104 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 179 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 180 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 181 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
|
D | 3x3p1-minmax-scalar-2x1-acc2.c | 34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local 80 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 81 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 126 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 127 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
|
D | 3x3p1-minmax-scalar-2x1.c | 34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local 80 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 81 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 124 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 125 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
|
D | 3x3s2p1-minmax-scalar-2x1-acc2.c | 35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() local 88 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() 89 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() 147 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() 148 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
|
D | 3x3s2p1-minmax-scalar-2x1.c | 35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() local 88 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() 89 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() 145 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() 146 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
|
D | 5x5p2-minmax-scalar-3x1.c | 34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 133 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 134 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 135 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 260 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 261 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 262 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 365 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 366 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 367 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 133 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 134 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 135 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 263 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 264 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 265 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 371 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 372 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 373 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
|
D | 5x5s2p2-minmax-scalar-3x1-acc2.c | 35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 168 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 169 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 170 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 305 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 306 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 307 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 385 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 386 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 387 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
|
D | 5x5s2p2-minmax-scalar-3x1.c | 35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local 168 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 169 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 170 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 302 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 303 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 304 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 379 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 380 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 381 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
|
D | 5x5p2-minmax-scalar-2x1.c | 34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 122 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 123 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 217 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 218 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 296 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 297 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-5x4.c | 42 const v128_t vbias = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local 133 v128_t vo0p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 134 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 135 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 136 v128_t vo3p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi3x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 137 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 248 v128_t vo0p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 249 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 250 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 251 v128_t vo3p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi3x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() [all …]
|
D | 3x3p1-minmax-ssse3-6x4.c | 37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local 137 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 138 __m128 vo1p0 = _mm_add_ps(vbias, _mm_mul_ps(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 139 __m128 vo2p0 = _mm_add_ps(vbias, _mm_mul_ps(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 140 __m128 vo3p0 = _mm_add_ps(vbias, _mm_mul_ps(vi3x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 141 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 142 __m128 vo5p0 = _mm_add_ps(vbias, _mm_mul_ps(vi5x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 271 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 272 __m128 vo1p0 = _mm_add_ps(vbias, _mm_mul_ps(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 273 __m128 vo2p0 = _mm_add_ps(vbias, _mm_mul_ps(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() [all …]
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c | 42 const v128_t vbias = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local 144 v128_t vo0p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 145 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 146 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 147 v128_t vo3p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi3x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 148 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 149 v128_t vo5p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi5x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 277 v128_t vo0p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 278 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 279 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() [all …]
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c8-minmax-wasmsimd-arm-2x.c | 95 const v128_t vbias = wasm_v128_load(w + 8); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() local 97 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() 98 vacc1 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc1)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() 121 const v128_t vbias = wasm_v128_load(w + 8); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() local 123 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() 124 vacc1 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc1)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x()
|
D | c8-minmax-wasmsimd-x86-2x.c | 95 const v128_t vbias = wasm_v128_load(w + 8); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() local 97 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() 98 vacc1 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc1)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() 121 const v128_t vbias = wasm_v128_load(w + 8); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() local 123 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() 124 vacc1 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc1)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x()
|
D | c1-minmax-scalar-2x.c | 53 const float vbias = w[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x() local 55 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x() 56 vacc1 = vacc1 * vscale + vbias; in xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x()
|
D | c1-minmax-wasm-2x.c | 53 const float vbias = w[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x() local 55 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x() 56 vacc1 = vacc1 * vscale + vbias; in xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x()
|
/external/XNNPACK/src/f32-vmulcaddc/ |
D | scalar.c.in | 66 const float vbias${ABC[C]} = w[${C + CHANNEL_TILE}]; 70 vacc${M}x${ABC[C]} = vacc${M}x${ABC[C]} * vscale${ABC[C]} + vbias${ABC[C]}; 94 const float vbias = w[${CHANNEL_TILE - 1}]; 97 vacc${M} = vacc${M} * vscale + vbias; 118 const float vbias = w[1]; 121 vacc${M} = vacc${M} * vscale + vbias;
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-sse41-c24-acc2.c | 56 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 113 const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 114 …const __m128i vacc4567 = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, _mm_cmpgt_epi16(_… in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 115 const __m128i vacc89AB = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 116 …const __m128i vaccCDEF = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x89ABCDEF, _mm_cmpgt_epi16(_… in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 117 const __m128i vaccGHIJ = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 118 …const __m128i vaccKLMN = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0xGHIJKLMN, _mm_cmpgt_epi16(_… in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 222 const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 223 …const __m128i vacc4567 = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, _mm_cmpgt_epi16(_… in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
|
D | 7x-minmax-wasmsimd-c24-acc2.c | 56 const v128_t vbias = wasm_v128_load(params->wasmsimd.bias); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 113 const v128_t vacc0123 = wasm_i32x4_add(vbias, wasm_i32x4_widen_low_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 114 const v128_t vacc4567 = wasm_i32x4_add(vbias, wasm_i32x4_widen_high_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 115 const v128_t vacc89AB = wasm_i32x4_add(vbias, wasm_i32x4_widen_low_i16x8(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 116 const v128_t vaccCDEF = wasm_i32x4_add(vbias, wasm_i32x4_widen_high_i16x8(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 117 const v128_t vaccGHIJ = wasm_i32x4_add(vbias, wasm_i32x4_widen_low_i16x8(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 118 const v128_t vaccKLMN = wasm_i32x4_add(vbias, wasm_i32x4_widen_high_i16x8(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 230 const v128_t vacc0123 = wasm_i32x4_add(vbias, wasm_i32x4_widen_low_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 231 const v128_t vacc4567 = wasm_i32x4_add(vbias, wasm_i32x4_widen_high_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
|