Home
last modified time | relevance | path

Searched refs:vi4x2345 (Results 1 – 25 of 91) sorted by relevance

1234

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-neon-4x4-acc2.c183 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
200 vo3p1 = vmlaq_lane_f32(vo3p1, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
204 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
208 vo1p1 = vmlaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
212 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
405 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
422 vo3p1 = vmlaq_lane_f32(vo3p1, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
426 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
430 vo1p1 = vmlaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
434 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
[all …]
D5x5p2-minmax-neon-4x4.c183 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
200 vo3p0 = vmlaq_lane_f32(vo3p0, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
204 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
208 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
212 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
401 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
418 vo3p0 = vmlaq_lane_f32(vo3p0, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
422 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
426 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
430 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
[all …]
D5x5p2-minmax-neonfma-4x4.c183 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
200 vo3p0 = vfmaq_lane_f32(vo3p0, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
204 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
208 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
212 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
401 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
418 vo3p0 = vfmaq_lane_f32(vo3p0, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
422 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
426 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
430 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
[all …]
D5x5p2-minmax-neonfma-4x4-acc2.c183 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
200 vo3p1 = vfmaq_lane_f32(vo3p1, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
204 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
208 vo1p1 = vfmaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
212 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
405 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
422 vo3p1 = vfmaq_lane_f32(vo3p1, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
426 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
430 vo1p1 = vfmaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
434 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
[all …]
D5x5p2-minmax-neonfma-1x4-acc3.c120 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local
131 vo0p1 = vfmaq_lane_f32(vo0p1, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3()
229 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local
240 vo0p1 = vfmaq_lane_f32(vo0p1, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3()
331 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3() local
341 vo0p1 = vfmaq_lane_f32(vo0p1, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc3()
D5x5p2-minmax-neon-1x4.c120 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local
131 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4()
227 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local
238 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4()
327 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4() local
337 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4()
D5x5p2-minmax-neonfma-1x4-acc2.c120 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local
131 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2()
228 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local
239 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2()
329 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2() local
339 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4_acc2()
D5x5p2-minmax-neon-1x4-acc2.c120 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local
131 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2()
228 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local
239 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2()
329 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2() local
339 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc2()
D5x5p2-minmax-neon-1x4-acc3.c120 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local
131 vo0p1 = vmlaq_lane_f32(vo0p1, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3()
229 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local
240 vo0p1 = vmlaq_lane_f32(vo0p1, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3()
331 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3() local
341 vo0p1 = vmlaq_lane_f32(vo0p1, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_1x4_acc3()
D5x5p2-minmax-neonfma-1x4.c120 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local
131 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4()
227 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local
238 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4()
327 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4() local
337 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_1x4()
D5x5p2-minmax-neonfma-2x4-acc2.c141 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local
156 vo1p1 = vfmaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
158 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
287 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local
302 vo1p1 = vfmaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
304 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
425 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2() local
438 vo1p1 = vfmaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
440 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4_acc2()
D5x5p2-minmax-neon-2x4.c141 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local
156 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
158 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
285 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local
300 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
302 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
421 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4() local
434 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
436 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4()
D5x5p2-minmax-neonfma-2x4.c141 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local
156 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
158 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
285 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local
300 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
302 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
421 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4() local
434 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
436 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_2x4()
D5x5p2-minmax-neon-2x4-acc2.c141 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local
156 vo1p1 = vmlaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
158 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
287 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local
302 vo1p1 = vmlaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
304 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
425 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2() local
438 vo1p1 = vmlaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
440 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_2x4_acc2()
D5x5p2-minmax-neon-3x4.c162 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local
179 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
182 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
185 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
343 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local
360 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
363 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
366 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
515 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4() local
529 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c191 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
208 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
211 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
214 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
374 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
391 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
394 vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
397 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
548 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2() local
562 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4_acc2()
[all …]
D5x5p2-minmax-neonfma-3x4-acc2.c162 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local
179 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
182 vo1p1 = vfmaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
185 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
346 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local
363 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
366 vo1p1 = vfmaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
369 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
521 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2() local
535 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-3x4.c191 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
208 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
211 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
214 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
371 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
388 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
391 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
394 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
542 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4() local
556 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_3x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-3x4.c191 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
208 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
211 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
214 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
371 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
388 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
391 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
394 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
542 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4() local
556 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_3x4()
[all …]
D5x5p2-minmax-neonfma-3x4.c162 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
179 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
182 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
185 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
343 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
360 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
363 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
366 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
515 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
529 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4()
[all …]
D5x5p2-minmax-neon-3x4-acc2.c162 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local
179 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
182 vo1p1 = vmlaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
185 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
346 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local
363 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
366 vo1p1 = vmlaq_lane_f32(vo1p1, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
369 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
521 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2() local
535 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_3x4_acc2()
[all …]
D5x5p2-minmax-neonfma-5x4.c204 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local
219 vo4p0 = vfmaq_lane_f32(vo4p0, vi4x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
224 vo3p0 = vfmaq_lane_f32(vo3p0, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
229 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
234 vo1p0 = vfmaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
239 vo0p0 = vfmaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
459 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local
474 vo4p0 = vfmaq_lane_f32(vo4p0, vi4x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
479 vo3p0 = vfmaq_lane_f32(vo3p0, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
484 vo2p0 = vfmaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
[all …]
D5x5p2-minmax-neon-5x4.c204 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local
219 vo4p0 = vmlaq_lane_f32(vo4p0, vi4x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
224 vo3p0 = vmlaq_lane_f32(vo3p0, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
229 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
234 vo1p0 = vmlaq_lane_f32(vo1p0, vi4x2345, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
239 vo0p0 = vmlaq_lane_f32(vo0p0, vi4x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
459 const float32x4_t vi4x2345 = vextq_f32(vi4x0123, vi4x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local
474 vo4p0 = vmlaq_lane_f32(vo4p0, vi4x2345, vget_low_f32(vw0123), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
479 vo3p0 = vmlaq_lane_f32(vo3p0, vi4x2345, vget_high_f32(vw4567), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
484 vo2p0 = vmlaq_lane_f32(vo2p0, vi4x2345, vget_high_f32(vw89AB), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c233 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
248 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi4x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
253 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
258 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
263 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
268 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
487 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
502 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi4x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
507 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
512 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
[all …]
D5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c233 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
248 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi4x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
253 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
258 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
263 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
268 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi4x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
487 const v128_t vi4x2345 = wasm_v32x4_shuffle(vi4x0123, vi4x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
502 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi4x2345, vk00)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
507 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x2345, vk10)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
512 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x2345, vk20)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
[all …]

1234