Home
last modified time | relevance | path

Searched refs:vi3x2345 (Results 1 – 25 of 91) sorted by relevance

1234

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-neon-4x4-acc2.c181 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
195 vo3p0 = vmlaq_lane_f32(vo3p0, vi3x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
199 vo2p1 = vmlaq_lane_f32(vo2p1, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
203 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
207 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
403 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
417 vo3p0 = vmlaq_lane_f32(vo3p0, vi3x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
421 vo2p1 = vmlaq_lane_f32(vo2p1, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
425 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
429 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
[all …]
D5x5p2-minmax-neon-4x4.c181 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
195 vo3p0 = vmlaq_lane_f32(vo3p0, vi3x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
199 vo2p0 = vmlaq_lane_f32(vo2p0, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
203 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
207 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
399 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
413 vo3p0 = vmlaq_lane_f32(vo3p0, vi3x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
417 vo2p0 = vmlaq_lane_f32(vo2p0, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
421 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
425 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
[all …]
D5x5p2-minmax-neonfma-4x4.c181 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
195 vo3p0 = vfmaq_lane_f32(vo3p0, vi3x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
199 vo2p0 = vfmaq_lane_f32(vo2p0, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
203 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
207 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
399 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
413 vo3p0 = vfmaq_lane_f32(vo3p0, vi3x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
417 vo2p0 = vfmaq_lane_f32(vo2p0, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
421 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
425 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
[all …]
D5x5p2-minmax-neonfma-4x4-acc2.c181 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
195 vo3p0 = vfmaq_lane_f32(vo3p0, vi3x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
199 vo2p1 = vfmaq_lane_f32(vo2p1, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
203 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
207 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
403 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
417 vo3p0 = vfmaq_lane_f32(vo3p0, vi3x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
421 vo2p1 = vfmaq_lane_f32(vo2p1, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
425 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
429 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
[all …]
D5x5p2-minmax-neonfma-1x4-acc3.c118 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local
129 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3()
227 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local
238 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3()
330 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local
339 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3()
D5x5p2-minmax-neon-1x4.c118 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local
129 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4()
225 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local
236 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4()
326 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local
335 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4()
D5x5p2-minmax-neonfma-1x4-acc2.c118 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local
129 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2()
226 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local
237 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2()
328 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local
337 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2()
D5x5p2-minmax-neon-1x4-acc2.c118 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local
129 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2()
226 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local
237 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2()
328 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local
337 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2()
D5x5p2-minmax-neon-1x4-acc3.c118 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local
129 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3()
227 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local
238 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3()
330 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local
339 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3()
D5x5p2-minmax-neonfma-1x4.c118 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local
129 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4()
225 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local
236 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4()
326 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local
335 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4()
D5x5p2-minmax-neonfma-2x4-acc2.c139 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local
153 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
155 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
285 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local
299 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
301 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
424 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local
435 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
437 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
D5x5p2-minmax-neon-2x4.c139 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local
153 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
155 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
283 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local
297 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
299 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
420 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local
431 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
433 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
D5x5p2-minmax-neonfma-2x4.c139 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local
153 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
155 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
283 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local
297 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
299 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
420 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local
431 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
433 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
D5x5p2-minmax-neon-2x4-acc2.c139 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local
153 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
155 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
285 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local
299 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
301 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
424 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local
435 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
437 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
D5x5p2-minmax-neon-3x4.c160 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local
175 vo2p0 = vmlaq_lane_f32(vo2p0, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
178 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
181 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
341 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local
356 vo2p0 = vmlaq_lane_f32(vo2p0, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
359 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
362 vo0p0 = vmlaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
514 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local
525 vo2p0 = vmlaq_lane_f32(vo2p0, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c189 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
204 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
207 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
210 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
372 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
387 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
390 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
393 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
547 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
558 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
[all …]
D5x5p2-minmax-neonfma-3x4-acc2.c160 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local
175 vo2p1 = vfmaq_lane_f32(vo2p1, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
178 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
181 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
344 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local
359 vo2p1 = vfmaq_lane_f32(vo2p1, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
362 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
365 vo0p1 = vfmaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
520 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local
531 vo2p1 = vfmaq_lane_f32(vo2p1, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-3x4.c189 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
204 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
207 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
210 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
369 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
384 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
387 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
390 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
541 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
552 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-3x4.c189 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
204 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
207 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
210 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
369 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
384 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
387 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
390 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
541 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
552 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
[all …]
D5x5p2-minmax-neonfma-3x4.c160 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
175 vo2p0 = vfmaq_lane_f32(vo2p0, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
178 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
181 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
341 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
356 vo2p0 = vfmaq_lane_f32(vo2p0, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
359 vo1p0 = vfmaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
362 vo0p0 = vfmaq_lane_f32(vo0p0, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
514 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
525 vo2p0 = vfmaq_lane_f32(vo2p0, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
[all …]
D5x5p2-minmax-neon-3x4-acc2.c160 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local
175 vo2p1 = vmlaq_lane_f32(vo2p1, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
178 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
181 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
344 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local
359 vo2p1 = vmlaq_lane_f32(vo2p1, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
362 vo1p0 = vmlaq_lane_f32(vo1p0, vi3x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
365 vo0p1 = vmlaq_lane_f32(vo0p1, vi3x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
520 const float32x4_t vi3x2345 = vextq_f32(vi3x0123, vi3x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local
531 vo2p1 = vmlaq_lane_f32(vo2p1, vi3x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c210 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
224 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
228 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
232 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
236 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
431 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
445 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
449 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
453 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
457 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4.c210 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4() local
224 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
228 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
232 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
236 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
427 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4() local
441 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
445 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
449 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
453 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c210 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
224 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
228 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
232 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
236 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
431 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
445 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
449 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
453 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
457 vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4.c210 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4() local
224 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
228 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
232 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
236 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
427 const v128_t vi3x2345 = wasm_v32x4_shuffle(vi3x0123, vi3x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4() local
441 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi3x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
445 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
449 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
453 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
[all …]

1234