Home
last modified time | relevance | path

Searched refs:vi7x3456 (Results 1 – 25 of 29) sorted by relevance

12

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-neon-6x4.c149 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4() local
170 vo5p0 = vmlaq_lane_f32(vo5p0, vi7x3456, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4()
290 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4() local
311 vo5p0 = vmlaq_lane_f32(vo5p0, vi7x3456, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4()
D3x3p1-minmax-ssse3-6x4.c163 …const __m128 vi7x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi7x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local
182 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi7x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
297 …const __m128 vi7x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi7x4567), _mm_castps_si… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local
316 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi7x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
D3x3p1-minmax-neonfma-6x4.c149 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4() local
170 vo5p0 = vfmaq_lane_f32(vo5p0, vi7x3456, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4()
290 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4() local
311 vo5p0 = vfmaq_lane_f32(vo5p0, vi7x3456, vget_high_f32(vw4567), 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4()
D3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c170 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local
189 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
303 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local
322 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-6x4.c170 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local
189 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
303 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local
322 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
D5x5p2-minmax-neonfma-5x4.c163 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local
188 vo4p0 = vfmaq_lane_f32(vo4p0, vi7x3456, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
193 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
418 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local
443 vo4p0 = vfmaq_lane_f32(vo4p0, vi7x3456, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
448 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
666 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local
691 vo4p0 = vfmaq_lane_f32(vo4p0, vi7x3456, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
696 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
D5x5p2-minmax-neon-5x4.c163 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local
188 vo4p0 = vmlaq_lane_f32(vo4p0, vi7x3456, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
193 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
418 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local
443 vo4p0 = vmlaq_lane_f32(vo4p0, vi7x3456, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
448 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
666 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local
691 vo4p0 = vmlaq_lane_f32(vo4p0, vi7x3456, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
696 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
D5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c192 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
217 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
222 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
446 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
471 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
476 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
693 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
718 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
723 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
D5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c192 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
217 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
222 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
446 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
471 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
476 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
693 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
718 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
723 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
D3x3p1-minmax-sse-6x4.c204 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local
223 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi7x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
388 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local
407 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi7x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
D5x5p2-minmax-sse-5x4.c200 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
222 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi7x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
226 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
453 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
475 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi7x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
479 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
700 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
722 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi7x3456, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
726 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
D3x3p1-minmax-wasmsimd-arm-splat-6x4.c152 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_6x4() local
173 …vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_6x4()
292 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_6x4() local
313 …vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_6x4()
D3x3p1-minmax-wasmsimd-x86-splat-6x4.c152 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_6x4() local
173 …vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_6x4()
292 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_6x4() local
313 …vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_6x4()
D5x5p2-minmax-wasmsimd-arm-splat-5x4.c166 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local
191 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
196 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
420 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local
445 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
450 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
667 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local
692 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
697 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
D5x5p2-minmax-wasmsimd-x86-splat-5x4.c166 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local
191 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
196 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
420 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local
445 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
450 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
667 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local
692 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
697 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
D5x5p2-minmax-neon-4x4-acc2.c148 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
173 vo3p1 = vmlaq_lane_f32(vo3p1, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
370 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
395 vo3p1 = vmlaq_lane_f32(vo3p1, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
586 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
611 vo3p1 = vmlaq_lane_f32(vo3p1, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
D5x5p2-minmax-neon-4x4.c148 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
173 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
366 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
391 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
578 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
603 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
D5x5p2-minmax-neonfma-4x4.c148 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
173 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
366 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
391 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
578 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
603 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
D5x5p2-minmax-neonfma-4x4-acc2.c148 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
173 vo3p1 = vfmaq_lane_f32(vo3p1, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
370 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
395 vo3p1 = vfmaq_lane_f32(vo3p1, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
586 const float32x4_t vi7x3456 = vextq_f32(vi7x0123, vi7x4567, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
611 vo3p1 = vfmaq_lane_f32(vo3p1, vi7x3456, vget_high_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c177 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
202 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
398 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
423 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
613 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
638 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4.c177 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4() local
202 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
394 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4() local
419 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
605 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4() local
630 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c177 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
202 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
398 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
423 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
613 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
638 vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4.c177 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4() local
202 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
394 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4() local
419 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
605 const v128_t vi7x3456 = wasm_v32x4_shuffle(vi7x0123, vi7x4567, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4() local
630 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
D5x5p2-minmax-sse-4x4.c183 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local
204 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
397 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local
418 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
606 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local
627 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
D5x5p2-minmax-sse-4x4-acc2.c183 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local
204 vo3p1 = _mm_add_ps(vo3p1, _mm_mul_ps(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
401 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local
422 vo3p1 = _mm_add_ps(vo3p1, _mm_mul_ps(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
614 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local
635 vo3p1 = _mm_add_ps(vo3p1, _mm_mul_ps(vi7x3456, vk41)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()

12