Home
last modified time | relevance | path

Searched refs:vb4567 (Results 1 – 25 of 313) sorted by relevance

12345678910>>...13

/external/XNNPACK/src/f32-ppmm/gen/
D8x8-minmax-neonfma.c89 const float32x4_t vb4567 = vld1q_f32(w); w += 4; in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() local
100 vacc0x4567 = vfmaq_laneq_f32(vacc0x4567, vb4567, va0123, 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
101 vacc1x4567 = vfmaq_laneq_f32(vacc1x4567, vb4567, va0123, 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
102 vacc2x4567 = vfmaq_laneq_f32(vacc2x4567, vb4567, va0123, 2); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
103 vacc3x4567 = vfmaq_laneq_f32(vacc3x4567, vb4567, va0123, 3); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
104 vacc4x4567 = vfmaq_laneq_f32(vacc4x4567, vb4567, va4567, 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
105 vacc5x4567 = vfmaq_laneq_f32(vacc5x4567, vb4567, va4567, 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
106 vacc6x4567 = vfmaq_laneq_f32(vacc6x4567, vb4567, va4567, 2); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
107 vacc7x4567 = vfmaq_laneq_f32(vacc7x4567, vb4567, va4567, 3); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
126 vacc0x4567 = vfmaq_f32(vacc0x4567, va0000, vb4567); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
[all …]
D4x8-minmax-neonfma.c64 const float32x4_t vb4567 = vld1q_f32(w); w += 4; in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma() local
71 vacc0x4567 = vfmaq_laneq_f32(vacc0x4567, vb4567, va0123, 0); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
72 vacc1x4567 = vfmaq_laneq_f32(vacc1x4567, vb4567, va0123, 1); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
73 vacc2x4567 = vfmaq_laneq_f32(vacc2x4567, vb4567, va0123, 2); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
74 vacc3x4567 = vfmaq_laneq_f32(vacc3x4567, vb4567, va0123, 3); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
85 vacc0x4567 = vfmaq_f32(vacc0x4567, va0000, vb4567); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
86 vacc1x4567 = vfmaq_f32(vacc1x4567, va1111, vb4567); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
87 vacc2x4567 = vfmaq_f32(vacc2x4567, va2222, vb4567); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
88 vacc3x4567 = vfmaq_f32(vacc3x4567, va3333, vb4567); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
D8x8-minmax-neon.c89 const float32x4_t vb4567 = vld1q_f32(w); w += 4; in xnn_f32_ppmm_minmax_ukernel_8x8__neon() local
99 vacc0x4567 = vmlaq_lane_f32(vacc0x4567, vb4567, vget_low_f32(va0123), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
100 vacc1x4567 = vmlaq_lane_f32(vacc1x4567, vb4567, vget_low_f32(va0123), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
101 vacc2x4567 = vmlaq_lane_f32(vacc2x4567, vb4567, vget_high_f32(va0123), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
102 vacc3x4567 = vmlaq_lane_f32(vacc3x4567, vb4567, vget_high_f32(va0123), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
103 vacc4x4567 = vmlaq_lane_f32(vacc4x4567, vb4567, vget_low_f32(va4567), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
104 vacc5x4567 = vmlaq_lane_f32(vacc5x4567, vb4567, vget_low_f32(va4567), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
105 vacc6x4567 = vmlaq_lane_f32(vacc6x4567, vb4567, vget_high_f32(va4567), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
106 vacc7x4567 = vmlaq_lane_f32(vacc7x4567, vb4567, vget_high_f32(va4567), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
D4x8-minmax-wasmsimd-arm-splat.c67 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat() local
79 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0000, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat()
80 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1111, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat()
81 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2222, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat()
82 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3333, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat()
D4x8-minmax-sse.c65 const __m128 vb4567 = _mm_load_ps(w + 4); in xnn_f32_ppmm_minmax_ukernel_4x8__sse() local
77 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0000, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__sse()
78 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1111, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__sse()
79 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2222, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__sse()
80 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3333, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__sse()
D4x8-minmax-neon.c64 const float32x4_t vb4567 = vld1q_f32(w); w += 4; in xnn_f32_ppmm_minmax_ukernel_4x8__neon() local
70 vacc0x4567 = vmlaq_lane_f32(vacc0x4567, vb4567, vget_low_f32(va0123), 0); in xnn_f32_ppmm_minmax_ukernel_4x8__neon()
71 vacc1x4567 = vmlaq_lane_f32(vacc1x4567, vb4567, vget_low_f32(va0123), 1); in xnn_f32_ppmm_minmax_ukernel_4x8__neon()
72 vacc2x4567 = vmlaq_lane_f32(vacc2x4567, vb4567, vget_high_f32(va0123), 0); in xnn_f32_ppmm_minmax_ukernel_4x8__neon()
73 vacc3x4567 = vmlaq_lane_f32(vacc3x4567, vb4567, vget_high_f32(va0123), 1); in xnn_f32_ppmm_minmax_ukernel_4x8__neon()
D4x8-minmax-wasmsimd-x86-splat.c65 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat() local
77 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0000, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat()
78 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1111, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat()
79 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2222, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat()
80 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3333, vb4567)); in xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat()
/external/XNNPACK/src/f32-gemm/gen-inc/
D6x8inc-minmax-wasmsimd-arm-loadsplat.c106 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat() local
115 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
116 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
117 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
118 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
119 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
120 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
D5x8inc-minmax-sse-load1.c94 const __m128 vb4567 = _mm_load_ps(w + 4); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1() local
102 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1()
103 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1()
104 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1()
105 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1()
106 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__sse_load1()
D5x8inc-minmax-wasmsimd-arm-loadsplat.c96 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat() local
104 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
105 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
106 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
107 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
108 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
D6x8inc-minmax-wasmsimd-x86-loadsplat.c104 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat() local
113 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
114 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
115 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
116 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
117 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
118 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
D4x8inc-minmax-sse-load1.c84 const __m128 vb4567 = _mm_load_ps(w + 4); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1() local
91 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1()
92 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1, vb4567)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1()
93 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2, vb4567)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1()
94 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3, vb4567)); in xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1()
/external/XNNPACK/src/f32-gemm/gen/
D6x8-minmax-wasmsimd-arm-loadsplat.c104 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat() local
113 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
114 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
115 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
116 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
117 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
118 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
D5x8-minmax-sse-load1.c92 const __m128 vb4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_minmax_ukernel_5x8__sse_load1() local
100 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_load1()
101 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_load1()
102 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_load1()
103 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_load1()
104 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__sse_load1()
D5x8-minmax-wasmsimd-arm-loadsplat.c94 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat() local
102 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
103 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
104 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
105 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
106 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
D6x8-minmax-wasmsimd-x86-loadsplat.c102 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat() local
111 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
112 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
113 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
114 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
115 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
116 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
D4x8-minmax-wasmsimd-arm-loadsplat.c84 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat() local
91 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat()
92 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat()
93 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat()
94 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat()
D4x8-minmax-sse-load1.c82 const __m128 vb4567 = _mm_load_ps(w + 4); in xnn_f32_gemm_minmax_ukernel_4x8__sse_load1() local
89 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_load1()
90 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1, vb4567)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_load1()
91 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2, vb4567)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_load1()
92 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3, vb4567)); in xnn_f32_gemm_minmax_ukernel_4x8__sse_load1()
/external/XNNPACK/src/f32-igemm/gen/
D6x8-minmax-wasmsimd-arm-loadsplat.c119 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat() local
136 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
138 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
140 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
142 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
144 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
146 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
D5x8-minmax-wasmsimd-arm-loadsplat.c108 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat() local
123 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
125 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
127 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
129 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
131 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat()
D5x8-minmax-sse-load1.c106 const __m128 vb4567 = _mm_load_ps(w + 4); in xnn_f32_igemm_minmax_ukernel_5x8__sse_load1() local
121 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_load1()
123 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_load1()
125 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_load1()
127 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_load1()
129 vacc4x4567 = _mm_add_ps(vacc4x4567, _mm_mul_ps(va4, vb4567)); in xnn_f32_igemm_minmax_ukernel_5x8__sse_load1()
D4x8-minmax-wasmsimd-arm-loadsplat.c97 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat() local
110 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat()
112 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat()
114 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat()
116 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat()
D4x8-minmax-sse-load1.c95 const __m128 vb4567 = _mm_load_ps(w + 4); in xnn_f32_igemm_minmax_ukernel_4x8__sse_load1() local
108 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_load1()
110 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1, vb4567)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_load1()
112 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2, vb4567)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_load1()
114 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3, vb4567)); in xnn_f32_igemm_minmax_ukernel_4x8__sse_load1()
/external/XNNPACK/src/f32-vbinary/gen/
Dvmin-wasmsimd-x86-x8.c38 const v128_t vb4567 = wasm_v128_load(b + 4); in xnn_f32_vmin_ukernel__wasmsimd_x86_x8() local
42 const v128_t vm4567 = wasm_f32x4_lt(va4567, vb4567); in xnn_f32_vmin_ukernel__wasmsimd_x86_x8()
45 v128_t vy4567 = wasm_v128_bitselect(va4567, vb4567, vm4567); in xnn_f32_vmin_ukernel__wasmsimd_x86_x8()
Dvmax-wasmsimd-x86-x8.c38 const v128_t vb4567 = wasm_v128_load(b + 4); in xnn_f32_vmax_ukernel__wasmsimd_x86_x8() local
42 const v128_t vm4567 = wasm_f32x4_le(va4567, vb4567); in xnn_f32_vmax_ukernel__wasmsimd_x86_x8()
45 v128_t vy4567 = wasm_v128_bitselect(vb4567, va4567, vm4567); in xnn_f32_vmax_ukernel__wasmsimd_x86_x8()

12345678910>>...13