/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-scalar-2x1.c | 99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 152 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 159 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 160 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 170 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 247 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 254 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 255 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 265 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 319 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() [all …]
|
D | 5x5p2-minmax-scalar-1x1-acc4.c | 91 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local 130 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 136 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 196 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 202 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 208 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 246 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
|
D | 5x5p2-minmax-scalar-1x1-acc3.c | 91 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local 130 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 136 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 195 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 201 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 207 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 244 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1.c | 91 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local 130 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 136 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 193 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 199 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 205 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 240 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
|
D | 5x5p2-minmax-scalar-1x1-acc2.c | 91 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local 130 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 136 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 194 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 200 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 206 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 242 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 152 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 159 vo1p1 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 160 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 170 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 251 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 258 vo1p1 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 259 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 269 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 327 vo1p1 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() [all …]
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 152 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 159 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 160 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 170 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 249 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 256 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 257 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 267 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 323 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() [all …]
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 91 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 130 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 136 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 197 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 203 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 209 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 248 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
|
D | 5x5p2-minmax-scalar-3x1.c | 107 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 174 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 182 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 184 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 198 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 301 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 309 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 311 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 325 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 398 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() [all …]
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 107 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 174 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 182 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 184 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 198 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 304 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 312 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 314 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 328 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 404 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() [all …]
|
D | 5x5s2p2-minmax-scalar-1x1-acc2.c | 99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local 130 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 148 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 154 vi1x2 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 199 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 231 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
|
D | 5x5s2p2-minmax-scalar-1x1-acc4.c | 99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local 130 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 148 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 154 vi1x2 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 201 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 235 vo0p3 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
|
D | 5x5s2p2-minmax-scalar-1x1.c | 99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local 130 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 148 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 154 vi1x2 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 198 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 229 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
|
D | 5x5s2p2-minmax-scalar-1x1-acc3.c | 99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local 130 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 148 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 154 vi1x2 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 200 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 233 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
|
D | 5x5s2p2-minmax-scalar-1x1-acc5.c | 99 float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local 130 vi1x0 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 148 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 154 vi1x2 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 202 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 237 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
|
D | 3x3p1-minmax-scalar-2x1-acc2.c | 76 const float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local 100 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 105 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 106 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
|
D | 3x3p1-minmax-scalar-2x1.c | 76 const float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local 100 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 105 vo1p0 += vi1x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 106 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
|
D | 3x3p1-minmax-scalar-1x1-acc3.c | 68 const float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() local 84 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() 88 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
|
D | 3x3p1-minmax-scalar-1x1-acc2.c | 68 const float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2() local 84 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2() 88 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
|
D | 3x3p1-minmax-scalar-1x1.c | 68 const float vi1x2 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1() local 84 vi1x1 = vi1x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1() 88 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-wasmsimd-2x2.c | 141 const v128_t vi1x2 = wasm_v128_load(i1); i1 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 155 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk10c1, wasm_v32x4_shuffle(vi1x2, vi1x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 176 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk10c2, wasm_v32x4_shuffle(vi1x2, vi1x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 197 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk11c0, wasm_v32x4_shuffle(vi1x2, vi1x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 218 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk11c1, wasm_v32x4_shuffle(vi1x2, vi1x2, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 286 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk12c1, wasm_v32x4_shuffle(vi1x2, vi1x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 307 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk12c2, wasm_v32x4_shuffle(vi1x2, vi1x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 394 v128_t vi1x2 = wasm_f32x4_const_splat(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 401 vi1x2 = wasm_v128_load(i1 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 416 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk10c1, wasm_v32x4_shuffle(vi1x2, vi1x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() [all …]
|
D | 3x3s2p1c3x4-sse-2x2.c | 141 const __m128 vi1x2 = _mm_loadu_ps(i1); i1 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 155 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk10c1, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 176 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk10c2, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 197 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk11c0, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 218 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk11c1, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 286 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk12c1, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 307 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk12c2, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 395 __m128 vi1x2 = _mm_setzero_ps(); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 402 vi1x2 = _mm_loadu_ps(i1 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 417 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk10c1, _mm_shuffle_ps(vi1x2, vi1x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 141 const float32x4_t vi1x2 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 155 vo0x1 = vfmaq_laneq_f32(vo0x1, vk10c1, vi1x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 176 vo0x1 = vfmaq_laneq_f32(vo0x1, vk10c2, vi1x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 197 vo0x1 = vfmaq_laneq_f32(vo0x1, vk11c0, vi1x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 218 vo0x1 = vfmaq_laneq_f32(vo0x1, vk11c1, vi1x2, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 286 vo0x0 = vfmaq_laneq_f32(vo0x0, vk12c1, vi1x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 307 vo0x0 = vfmaq_laneq_f32(vo0x0, vk12c2, vi1x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 395 float32x4_t vi1x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 402 vi1x2 = vld1q_f32(i1 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 417 vo0x1 = vfmaq_laneq_f32(vo0x1, vk10c1, vi1x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | wasm-2x4.c | 59 float vi1x2 = i1[2]; in xnn_f32_prelu_ukernel__wasm_2x4() local 75 float vacc1x2 = __builtin_wasm_max_f32(vi1x2, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 76 vi1x2 = __builtin_wasm_min_f32(vi1x2, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 86 vacc1x2 += vi1x2 * vw2; in xnn_f32_prelu_ukernel__wasm_2x4()
|
D | scalar-2x4.c | 58 const float vi1x2 = i1[2]; in xnn_f32_prelu_ukernel__scalar_2x4() local 68 const float vacc1x2 = XNN_UNPREDICTABLE(vi1x2 < 0.0f) ? vi1x2 * vw2 : vi1x2; in xnn_f32_prelu_ukernel__scalar_2x4()
|