/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-scalar-6x1.c | 39 const float vk11 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local 150 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 151 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 152 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 153 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 154 vo4p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 155 vo5p0 += vi6x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 241 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 242 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 243 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() [all …]
|
D | 3x3p1-minmax-scalar-5x1.c | 39 const float vk11 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local 136 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 137 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 138 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 139 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 140 vo4p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 214 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 215 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 216 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 217 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() [all …]
|
D | 3x3p1-minmax-scalar-4x1.c | 39 const float vk11 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local 122 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 123 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 124 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 125 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 187 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 188 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 189 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 190 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
|
D | 3x3s2p1-minmax-scalar-4x1.c | 40 const float vk11 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local 152 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 153 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 154 vo2p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 155 vo3p0 += vi7x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 230 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 231 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 232 vo2p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() 233 vo3p0 += vi7x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
|
D | 3x3p1-minmax-scalar-3x1.c | 39 const float vk11 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local 108 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 109 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 110 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 160 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 161 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 162 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
|
D | 3x3s2p1-minmax-scalar-3x1.c | 40 const float vk11 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local 130 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 131 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 132 vo2p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 192 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 193 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() 194 vo2p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
|
D | 3x3p1-minmax-scalar-2x1-acc2.c | 39 const float vk11 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local 94 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 95 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 135 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 136 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
|
D | 3x3p1-minmax-scalar-2x1.c | 39 const float vk11 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local 94 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 95 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 133 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 134 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
|
D | 3x3s2p1-minmax-scalar-2x1-acc2.c | 40 const float vk11 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() local 108 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() 109 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() 156 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() 157 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
|
D | 3x3s2p1-minmax-scalar-2x1.c | 40 const float vk11 = weights[5]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() local 108 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() 109 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() 154 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() 155 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
|
D | 5x5p2-minmax-scalar-3x1.c | 41 const float vk11 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 160 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 161 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 162 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 287 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 288 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 289 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 384 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 385 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 386 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
|
D | 5x5s2p2-minmax-scalar-3x1.c | 42 const float vk11 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local 197 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 198 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 199 vo2p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 321 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 322 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 323 vo2p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 398 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 399 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 400 vo2p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
|
D | 5x5s2p2-minmax-scalar-3x1-acc2.c | 42 const float vk11 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 197 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 198 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 199 vo2p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 324 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 325 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 326 vo2p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 404 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 405 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 406 vo2p0 += vi5x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 41 const float vk11 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 160 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 161 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 162 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 290 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 291 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 292 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 390 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 391 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 392 vo2p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
|
D | 3x3p1-minmax-ssse3-6x4.c | 42 const __m128 vk11 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local 143 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 144 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 145 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 146 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 147 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi5x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 148 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi6x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 277 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 278 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 279 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() [all …]
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c | 47 const v128_t vk11 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local 148 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 149 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 150 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 151 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 152 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 153 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi6x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 281 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 282 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 283 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() [all …]
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-6x4.c | 47 const v128_t vk11 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local 148 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 149 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 150 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 151 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 152 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 153 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi6x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 281 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 282 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 283 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() [all …]
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-5x4.c | 47 const v128_t vk11 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() local 136 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 137 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 138 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 139 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 140 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 251 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 252 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 253 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 254 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() [all …]
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-5x4.c | 47 const v128_t vk11 = wasm_v32x4_shuffle(vw4567, vw4567, 1, 1, 1, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local 136 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 137 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 138 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 139 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 140 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 251 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 252 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 253 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 254 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() [all …]
|
D | 5x5p2-minmax-scalar-2x1.c | 41 const float vk11 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 142 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 143 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 237 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 238 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 309 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 310 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
|
D | 3x3p1-minmax-sse-6x4.c | 42 const __m128 vk11 = _mm_load1_ps(weights + 5); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 176 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 177 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 178 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 179 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 180 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi5x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 181 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi6x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 360 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 361 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 362 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x4567, vk11)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() [all …]
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 42 const float vk11 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 167 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 168 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 261 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 262 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 319 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 320 vo1p0 += vi3x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 41 const float vk11 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 142 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 143 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 241 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 242 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 317 vo0p0 += vi1x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 318 vo1p0 += vi2x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
|
/external/deqp/external/vulkancts/framework/vulkan/generated/vulkansc/ |
D | vkDeviceProperties.inl | 39 propertyType.subgroupSize = allPropertiesBlobs.vk11.subgroupSize; 40 propertyType.supportedStages = allPropertiesBlobs.vk11.subgroupSupportedStages; 41 propertyType.supportedOperations = allPropertiesBlobs.vk11.subgroupSupportedOperations; 42 propertyType.quadOperationsInAllStages = allPropertiesBlobs.vk11.subgroupQuadOperationsInAllStages; 46 propertyType.pointClippingBehavior = allPropertiesBlobs.vk11.pointClippingBehavior; 50 propertyType.maxMultiviewViewCount = allPropertiesBlobs.vk11.maxMultiviewViewCount; 51 propertyType.maxMultiviewInstanceIndex = allPropertiesBlobs.vk11.maxMultiviewInstanceIndex; 55 propertyType.protectedNoFault = allPropertiesBlobs.vk11.protectedNoFault; 59 …memcpy(propertyType.deviceUUID, allPropertiesBlobs.vk11.deviceUUID, sizeof(uint8_t) * VK_UUID_SIZE… 60 …memcpy(propertyType.driverUUID, allPropertiesBlobs.vk11.driverUUID, sizeof(uint8_t) * VK_UUID_SIZE… [all …]
|
D | vkDeviceFeatures.inl | 62 featureType.storageBuffer16BitAccess = allFeaturesBlobs.vk11.storageBuffer16BitAccess; 63 …featureType.uniformAndStorageBuffer16BitAccess = allFeaturesBlobs.vk11.uniformAndStorageBuffer16Bi… 64 featureType.storagePushConstant16 = allFeaturesBlobs.vk11.storagePushConstant16; 65 featureType.storageInputOutput16 = allFeaturesBlobs.vk11.storageInputOutput16; 69 featureType.multiview = allFeaturesBlobs.vk11.multiview; 70 featureType.multiviewGeometryShader = allFeaturesBlobs.vk11.multiviewGeometryShader; 71 featureType.multiviewTessellationShader = allFeaturesBlobs.vk11.multiviewTessellationShader; 75 featureType.variablePointersStorageBuffer = allFeaturesBlobs.vk11.variablePointersStorageBuffer; 76 featureType.variablePointers = allFeaturesBlobs.vk11.variablePointers; 80 featureType.protectedMemory = allFeaturesBlobs.vk11.protectedMemory; [all …]
|