/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p1c3x4-neonfma-2x2.c | 303 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 305 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk22c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 306 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk22c1x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 308 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk22c1x0123, vget_high_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 309 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk22c1x0123, vget_high_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 528 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() local 530 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk22c1x0123, vi2x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 531 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk22c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2()
|
D | 3x3s2p1c3x4-neon-2x2.c | 301 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 303 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk22c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 304 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk22c1x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 306 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk22c1x0123, vget_high_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 307 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk22c1x0123, vget_high_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 528 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local 530 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk22c1x0123, vi2x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 531 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk22c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 301 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 303 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 304 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 306 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk22c1x0123, vget_low_f32(vi2x3), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 307 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk22c1x0123, vget_low_f32(vi4x3), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 521 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() local 523 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 524 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2()
|
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 303 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 305 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 306 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 308 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk22c1x0123, vget_low_f32(vi2x3), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 309 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk22c1x0123, vget_low_f32(vi4x3), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 521 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() local 523 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 524 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2()
|
D | 3x3s2p0p1c3x4-neon-2x1.c | 216 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() local 218 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 219 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
|
D | 3x3s2p0p1c3x4-neonfma-2x1.c | 218 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() local 220 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 221 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neonfma-2x1.c | 225 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() local 227 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk22c1x0123, vi2x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 228 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk22c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neon-2x1.c | 223 const float32x4_t vk22c1x0123 = vld1q_f32(w + 96); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() local 225 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk22c1x0123, vi2x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 226 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk22c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 422 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 425 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk22c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 426 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk22c1x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 430 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk22c1x0123, vget_high_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 431 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk22c1x0123, vget_high_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 761 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() local 764 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk22c1x0123, vi2x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 765 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk22c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2()
|
D | 3x3s2p1c3x8-neon-2x2.c | 420 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 423 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk22c1x0123, vget_low_f32(vi2x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 424 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk22c1x0123, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 428 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk22c1x0123, vget_high_f32(vi2x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 429 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk22c1x0123, vget_high_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 761 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() local 764 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk22c1x0123, vi2x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 765 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk22c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2()
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 422 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 425 vo0x0c0123 = vfmaq_lane_f32(vo0x0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 426 vo1x0c0123 = vfmaq_lane_f32(vo1x0c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 430 vo0x1c0123 = vfmaq_lane_f32(vo0x1c0123, vk22c1x0123, vget_low_f32(vi2x3), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 431 vo1x1c0123 = vfmaq_lane_f32(vo1x1c0123, vk22c1x0123, vget_low_f32(vi4x3), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 754 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() local 757 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 758 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2()
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 420 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 423 vo0x0c0123 = vmlaq_lane_f32(vo0x0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 424 vo1x0c0123 = vmlaq_lane_f32(vo1x0c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 428 vo0x1c0123 = vmlaq_lane_f32(vo0x1c0123, vk22c1x0123, vget_low_f32(vi2x3), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 429 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk22c1x0123, vget_low_f32(vi4x3), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 754 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() local 757 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 758 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2()
|
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 289 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() local 292 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 293 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
|
D | 3x3s2p1c3x8-neon-2x1.c | 294 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local 297 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk22c1x0123, vi2x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 298 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk22c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 287 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() local 290 vo0c0123 = vmlaq_lane_f32(vo0c0123, vk22c1x0123, vget_high_f32(vi2x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 291 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk22c1x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 296 const float32x4_t vk22c1x0123 = vld1q_f32(w + 192); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local 299 vo0c0123 = vfmaq_lane_f32(vo0c0123, vk22c1x0123, vi2x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 300 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk22c1x0123, vi4x2, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-sse-1x1.c | 177 const __m128 vk22c1x0123 = _mm_load_ps(w + 96); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local 179 voc0123 = _mm_add_ps(voc0123, _mm_mul_ps(vk22c1x0123, vi22c1)); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1()
|