/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-scalar-4x1.c | 78 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local 101 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 104 vo2p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 107 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 114 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 173 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 176 vo2p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 179 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
|
D | 3x3p1-minmax-scalar-5x1.c | 84 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local 110 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 114 vo2p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 118 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 126 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 196 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 200 vo2p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 204 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
|
D | 5x5p2-minmax-scalar-3x1.c | 93 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 138 vo2p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 140 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 142 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 152 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 265 vo2p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 267 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 269 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 279 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 370 vo2p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() [all …]
|
D | 5x5p2-minmax-scalar-2x1.c | 87 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 127 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 128 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 136 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 222 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 223 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 231 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 301 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 302 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
|
D | 3x3p1-minmax-scalar-3x1.c | 72 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local 94 vo2p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 96 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 102 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 152 vo2p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() 154 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 93 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 138 float vo2p1 = vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 140 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 142 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 152 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 268 float vo2p1 = vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 270 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 272 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 282 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 376 float vo2p1 = vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() [all …]
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 87 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 127 float vo1p2 = vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 128 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 136 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 226 float vo1p2 = vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 227 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 235 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 309 float vo1p2 = vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 310 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 87 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 127 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 128 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 136 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 224 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 225 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 233 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 305 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 306 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
|
D | 5x5p2-minmax-scalar-1x1-acc4.c | 81 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local 114 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 120 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 180 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 186 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 236 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
|
D | 5x5p2-minmax-scalar-1x1-acc3.c | 81 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local 114 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 120 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 179 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 185 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 234 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1.c | 81 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local 114 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 120 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 177 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 183 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 230 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
|
D | 5x5p2-minmax-scalar-1x1-acc2.c | 81 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local 114 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 120 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 178 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 184 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 232 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
|
D | 3x3p1-minmax-scalar-6x1.c | 90 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local 119 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 124 vo2p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 129 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 138 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 219 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 224 vo2p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 229 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 81 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 114 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 120 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 181 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 187 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 238 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 100 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 149 float vo1p1 = vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 152 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 160 vi3x0 = vi3x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 251 float vo1p1 = vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 254 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 309 float vo1p1 = vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 312 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
|
D | 5x5s2p2-minmax-scalar-2x1-acc2.c | 100 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 149 float vo1p1 = vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 152 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 160 vi3x0 = vi3x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 249 float vo1p1 = vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 252 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 305 float vo1p1 = vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 308 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1.c | 100 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 149 vo1p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 152 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 160 vi3x0 = vi3x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 247 vo1p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 250 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 301 vo1p0 += vi3x0 * vk10; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 304 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
|
D | 3x3p1-minmax-scalar-2x1-acc2.c | 66 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local 85 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 90 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() 131 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
|
D | 3x3p1-minmax-scalar-2x1.c | 66 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local 85 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 90 vi3x0 = vi3x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() 129 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
|
D | 5x5s2p2-minmax-scalar-1x1-acc4.c | 89 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local 126 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 132 vi3x0 = vi3x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 191 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 225 float vo0p3 = vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
|
D | 5x5s2p2-minmax-scalar-1x1.c | 89 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local 126 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 132 vi3x0 = vi3x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 188 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 219 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
|
D | 5x5s2p2-minmax-scalar-1x1-acc2.c | 89 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local 126 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 132 vi3x0 = vi3x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 189 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 221 vo0p1 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
|
D | 5x5s2p2-minmax-scalar-1x1-acc3.c | 89 float vi3x0 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local 126 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 132 vi3x0 = vi3x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 190 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 223 vo0p0 += vi3x0 * vk30; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
|
/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 88 float32x4_t vi3x0 = vld1q_f32(i3); i3 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 110 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 112 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk10c0x4567, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 134 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 136 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk10c1x4567, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 158 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 160 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk10c2x4567, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 182 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c0x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 184 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk11c0x4567, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 331 vi3x0 = vcombine_f32(vget_high_f32(vi3x1), vi3x2); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 86 float32x4_t vi3x0 = vld1q_f32(i3); i3 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 108 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c0x0123, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 110 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk10c0x4567, vget_low_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 132 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c1x0123, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 134 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk10c1x4567, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 156 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk10c2x0123, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 158 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk10c2x4567, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 180 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c0x0123, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 182 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk11c0x4567, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 329 vi3x0 = vcombine_f32(vget_high_f32(vi3x1), vi3x2); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() [all …]
|