/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p1c3x4-neonfma-2x2.c | 247 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 249 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 250 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 252 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk11c2x0123, vget_low_f32(vi1x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 253 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk11c2x0123, vget_low_f32(vi3x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 486 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 488 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 489 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 677 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 679 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 245 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 247 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 248 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 250 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk11c2x0123, vget_low_f32(vi1x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 251 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk11c2x0123, vget_low_f32(vi3x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 486 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 488 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 489 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 679 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 681 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 238 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 240 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 241 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 243 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk11c2x0123, vget_high_f32(vi1x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 244 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk11c2x0123, vget_high_f32(vi3x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 486 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 488 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 489 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 679 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 681 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 240 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 242 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 243 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 245 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk11c2x0123, vget_high_f32(vi1x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 246 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk11c2x0123, vget_high_f32(vi3x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 486 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 488 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 489 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 677 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 679 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x1.c | 181 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 183 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 184 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 375 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 377 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 378 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vi3x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
|
D | 3x3s2p0p1c3x4-neonfma-2x1.c | 183 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 185 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 186 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 375 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 377 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 378 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vi3x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neonfma-2x1.c | 183 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 185 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 186 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 375 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 377 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 378 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neon-2x1.c | 181 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 183 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 184 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 375 const float32x4_t vk11c2x0123 = vld1q_f32(w + 68); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 377 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 378 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 331 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 334 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 335 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 339 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk11c2x0123, vget_low_f32(vi1x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 340 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk11c2x0123, vget_low_f32(vi3x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 698 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 701 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 702 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 984 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 987 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 329 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 332 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 333 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 337 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk11c2x0123, vget_low_f32(vi1x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 338 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk11c2x0123, vget_low_f32(vi3x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 698 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 701 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 702 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 986 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 989 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 324 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 327 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 328 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 332 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk11c2x0123, vget_high_f32(vi1x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 333 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk11c2x0123, vget_high_f32(vi3x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 698 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 701 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 702 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 984 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 987 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 322 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 325 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 326 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 330 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk11c2x0123, vget_high_f32(vi1x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 331 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk11c2x0123, vget_high_f32(vi3x2), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 698 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 701 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 702 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 986 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 989 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 233 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 236 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 237 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 520 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 523 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 524 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vi3x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
|
D | 3x3s2p1c3x8-neon-2x1.c | 231 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 234 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 235 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 520 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 523 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 524 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 231 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 234 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vget_low_f32(vi1x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 235 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vget_low_f32(vi3x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 520 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 523 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk11c2x0123, vi1x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 524 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk11c2x0123, vi3x1, 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 233 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 236 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 237 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 520 const float32x4_t vk11c2x0123 = vld1q_f32(w + 136); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 523 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk11c2x0123, vget_high_f32(vi1x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 524 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk11c2x0123, vget_high_f32(vi3x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-sse-1x1.c | 145 const __m128 vk11c2x0123 = _mm_load_ps(w + 68); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local 147 voc0123 = _mm_add_ps(voc0123, _mm_mul_ps(vk11c2x0123, vi11c2)); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() 272 const __m128 vk11c2x0123 = _mm_load_ps(w + 68); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local 274 voc0123 = _mm_add_ps(voc0123, _mm_mul_ps(vk11c2x0123, vi11c2)); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1()
|