Home
last modified time | relevance | path

Searched refs:vacc1x0123 (Results 1 – 25 of 484) sorted by relevance

12345678910>>...20

/external/XNNPACK/src/f32-vmulcaddc/gen/
Dc8-minmax-sse-2x.c56 __m128 vacc1x0123 = _mm_loadu_ps(i1); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x() local
62 vacc1x0123 = _mm_mul_ps(vacc1x0123, vscale0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x()
70 vacc1x0123 = _mm_add_ps(vacc1x0123, vbias0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x()
75 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x()
80 vacc1x0123 = _mm_min_ps(vacc1x0123, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x()
86 _mm_storeu_ps(o1, vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x()
97 __m128 vacc1x0123 = _mm_loadu_ps(i1); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x() local
101 vacc1x0123 = _mm_mul_ps(vacc1x0123, vscale0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x()
106 vacc1x0123 = _mm_add_ps(vacc1x0123, vbias0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x()
109 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x()
[all …]
Dc8-minmax-neon-2x.c55 float32x4_t vacc1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x() local
60 vacc1x0123 = vmulq_f32(vacc1x0123, vscale0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
68 vacc1x0123 = vaddq_f32(vacc1x0123, vbias0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
73 vacc1x0123 = vmaxq_f32(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
78 vacc1x0123 = vminq_f32(vacc1x0123, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
83 vst1q_f32(o1, vacc1x0123); o1 += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
90 float32x4_t vacc1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x() local
93 vacc1x0123 = vmulq_f32(vacc1x0123, vscale0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
98 vacc1x0123 = vaddq_f32(vacc1x0123, vbias0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
101 vacc1x0123 = vmaxq_f32(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
[all …]
Dc4-minmax-sse-2x.c54 __m128 vacc1x0123 = _mm_loadu_ps(i1); in xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x() local
58 vacc1x0123 = _mm_mul_ps(vacc1x0123, vscale0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x()
63 vacc1x0123 = _mm_add_ps(vacc1x0123, vbias0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x()
66 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x()
69 vacc1x0123 = _mm_min_ps(vacc1x0123, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x()
73 _mm_storeu_ps(o1, vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x()
83 __m128 vacc1x0123 = _mm_loadu_ps(i1); in xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x() local
87 vacc1x0123 = _mm_mul_ps(vacc1x0123, vscale0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x()
92 vacc1x0123 = _mm_add_ps(vacc1x0123, vbias0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x()
95 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x()
[all …]
Dc8-minmax-neonfma-2x.c55 float32x4_t vacc1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x() local
64 vacc1x0123 = vfmaq_f32(vbias0123, vscale0123, vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
69 vacc1x0123 = vmaxq_f32(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
74 vacc1x0123 = vminq_f32(vacc1x0123, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
79 vst1q_f32(o1, vacc1x0123); o1 += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
86 float32x4_t vacc1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x() local
92 vacc1x0123 = vfmaq_f32(vbias0123, vscale0123, vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
95 vacc1x0123 = vmaxq_f32(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
98 vacc1x0123 = vminq_f32(vacc1x0123, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
101 vst1q_f32(o1, vacc1x0123); o1 += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
[all …]
Dc4-minmax-neon-2x.c53 float32x4_t vacc1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x() local
56 vacc1x0123 = vmulq_f32(vacc1x0123, vscale0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
61 vacc1x0123 = vaddq_f32(vacc1x0123, vbias0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
64 vacc1x0123 = vmaxq_f32(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
67 vacc1x0123 = vminq_f32(vacc1x0123, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
70 vst1q_f32(o1, vacc1x0123); o1 += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
76 float32x4_t vacc1x0123 = vld1q_f32(i1); i1 = (const float*) ((uintptr_t) i1 + c); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x() local
79 vacc1x0123 = vmulq_f32(vacc1x0123, vscale0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
84 vacc1x0123 = vaddq_f32(vacc1x0123, vbias0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
87 vacc1x0123 = vmaxq_f32(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
[all …]
Dc4-minmax-neonfma-2x.c53 float32x4_t vacc1x0123 = vld1q_f32(i1); i1 += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x() local
59 vacc1x0123 = vfmaq_f32(vbias0123, vscale0123, vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
62 vacc1x0123 = vmaxq_f32(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
65 vacc1x0123 = vminq_f32(vacc1x0123, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
68 vst1q_f32(o1, vacc1x0123); o1 += 4; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
74 float32x4_t vacc1x0123 = vld1q_f32(i1); i1 = (const float*) ((uintptr_t) i1 + c); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x() local
80 vacc1x0123 = vfmaq_f32(vbias0123, vscale0123, vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
83 vacc1x0123 = vmaxq_f32(vacc1x0123, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
86 vacc1x0123 = vminq_f32(vacc1x0123, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
89 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D2x8-minmax-neon-mull-addw-dup.c51 int32x4_t vacc1x0123 = vacc0x0123; in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() local
65 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
73 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
81 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
89 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c3)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
97 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c4)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
105 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c5)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
113 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c6)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
121 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c7)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
136 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
[all …]
D2x8-minmax-neon-mlal-lane.c51 int32x4_t vacc1x0123 = vacc0x0123; in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane() local
66 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
73 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
80 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
87 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c3), vget_low_s16(vxa1), 3); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
95 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
102 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa1), 1); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
109 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa1), 2); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
116 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c7), vget_high_s16(vxa1), 3); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
132 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D2x8-minmax-neon-mull-addw-dup.c52 int32x4_t vacc1x0123 = vacc0x0123; in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() local
78 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
86 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
94 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
102 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c3)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
110 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c4)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
118 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c5)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
126 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c6)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
134 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c7)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
149 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
[all …]
D2x8-minmax-neon-mlal-lane.c52 int32x4_t vacc1x0123 = vacc0x0123; in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane() local
79 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
86 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
93 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
100 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c3), vget_low_s16(vxa1), 3); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
108vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
115vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c5), vget_high_s16(vxa1), 1); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
122vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c6), vget_high_s16(vxa1), 2); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
129vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c7), vget_high_s16(vxa1), 3); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
145 vacc1x0123 = vmlal_lane_s16(vacc1x0123, vget_low_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D3x8s4-minmax-wasmsimd-x86.c56 v128_t vacc1x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86() local
76 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86()
90 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c1)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86()
104 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86()
118 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86()
142 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86()
154 vacc1x0123 = wasm_v128_bitselect(vmin, vacc1x0123, wasm_f32x4_lt(vacc1x0123, vmin)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86()
162 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vmax, wasm_f32x4_le(vacc1x0123, vmax)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86()
172 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86()
187 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86()
[all …]
D3x8-minmax-wasmsimd-x86-splat.c56 v128_t vacc1x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat() local
79 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c0, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
92 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c1, vb0123c1)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
105 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c2, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
118 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c3, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
141 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
153 vacc1x0123 = wasm_v128_bitselect(vmin, vacc1x0123, wasm_f32x4_lt(vacc1x0123, vmin)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
161 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vmax, wasm_f32x4_le(vacc1x0123, vmax)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
171 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
186 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
[all …]
D3x8-minmax-sse-dup.c56 __m128 vacc1x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup() local
80 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
94 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c1111, vb0123c1)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
108 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
122 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
145 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123)); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
157 vacc1x0123 = _mm_min_ps(vacc1x0123, vmax); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
165 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
175 _mm_storeu_ps(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
190 _mm_storeu_ps(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8__sse_dup()
[all …]
D3x8-minmax-sse2-dup.c56 __m128 vacc1x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup() local
80 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c0000, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
94 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c1111, vb0123c1)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
108 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c2222, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
122 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1c3333, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
145 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123)); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
157 vacc1x0123 = _mm_min_ps(vacc1x0123, vmax); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
165 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
175 _mm_storeu_ps(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
190 _mm_storeu_ps(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup()
[all …]
D3x8s4-minmax-wasmsimd-arm.c58 v128_t vacc1x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm() local
78 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm()
92 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c1)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm()
106 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm()
120 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm()
144 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123)); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm()
155 vacc1x0123 = wasm_f32x4_max(vacc1x0123, vmin); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm()
162 vacc1x0123 = wasm_f32x4_min(vacc1x0123, vmax); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm()
172 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm()
187 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm()
[all …]
D3x8s4-minmax-sse.c56 __m128 vacc1x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_3x8s4__sse() local
76 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_3x8s4__sse()
90 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c1)); in xnn_f32_gemm_minmax_ukernel_3x8s4__sse()
104 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_3x8s4__sse()
118 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_3x8s4__sse()
142 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123)); in xnn_f32_gemm_minmax_ukernel_3x8s4__sse()
154 vacc1x0123 = _mm_min_ps(vacc1x0123, vmax); in xnn_f32_gemm_minmax_ukernel_3x8s4__sse()
162 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_gemm_minmax_ukernel_3x8s4__sse()
172 _mm_storeu_ps(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8s4__sse()
187 _mm_storeu_ps(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8s4__sse()
[all …]
D3x8-minmax-wasmsimd-arm-splat.c58 v128_t vacc1x0123 = vacc0x0123; in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat() local
81 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c0, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat()
94 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c1, vb0123c1)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat()
107 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c2, vb0123c2)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat()
120 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c3, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat()
143 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123)); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat()
154 vacc1x0123 = wasm_f32x4_max(vacc1x0123, vmin); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat()
161 vacc1x0123 = wasm_f32x4_min(vacc1x0123, vmax); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat()
171 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat()
186 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D3x8s4inc-minmax-wasmsimd-x86.c58 v128_t vacc1x0123 = wasm_v128_load(acc + 8); in xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86() local
78 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86()
92 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c1)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86()
106 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86()
120 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86()
144 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86()
156 vacc1x0123 = wasm_v128_bitselect(vmin, vacc1x0123, wasm_f32x4_lt(vacc1x0123, vmin)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86()
164 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vmax, wasm_f32x4_le(vacc1x0123, vmax)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86()
174 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86()
189 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_3x8s4__wasmsimd_x86()
[all …]
D3x8inc-minmax-wasmsimd-x86-splat.c58 v128_t vacc1x0123 = wasm_v128_load(acc + 8); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat() local
81 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c0, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat()
94 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c1, vb0123c1)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat()
107 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c2, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat()
120 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c3, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat()
143 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat()
155 vacc1x0123 = wasm_v128_bitselect(vmin, vacc1x0123, wasm_f32x4_lt(vacc1x0123, vmin)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat()
163 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vmax, wasm_f32x4_le(vacc1x0123, vmax)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat()
173 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat()
188 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_x86_splat()
[all …]
D3x8s4inc-minmax-sse.c58 __m128 vacc1x0123 = _mm_load_ps(acc + 8); in xnn_f32_gemminc_minmax_ukernel_3x8s4__sse() local
78 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__sse()
92 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c1)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__sse()
106 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__sse()
120 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__sse()
144 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123)); in xnn_f32_gemminc_minmax_ukernel_3x8s4__sse()
156 vacc1x0123 = _mm_min_ps(vacc1x0123, vmax); in xnn_f32_gemminc_minmax_ukernel_3x8s4__sse()
164 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_gemminc_minmax_ukernel_3x8s4__sse()
174 _mm_storeu_ps(c1, vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_3x8s4__sse()
189 _mm_storeu_ps(c1, vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_3x8s4__sse()
[all …]
D3x8inc-minmax-wasmsimd-arm-splat.c60 v128_t vacc1x0123 = wasm_v128_load(acc + 8); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat() local
83 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c0, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat()
96 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c1, vb0123c1)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat()
109 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c2, vb0123c2)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat()
122 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c3, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat()
145 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123)); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat()
156 vacc1x0123 = wasm_f32x4_max(vacc1x0123, vmin); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat()
163 vacc1x0123 = wasm_f32x4_min(vacc1x0123, vmax); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat()
173 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat()
188 wasm_v128_store(c1, vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_3x8__wasmsimd_arm_splat()
[all …]
/external/XNNPACK/src/f32-igemm/gen/
D3x8-minmax-wasmsimd-x86-splat.c56 v128_t vacc1x0123 = vacc0x0123; in xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat() local
98 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c0, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
111 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c1, vb0123c1)); in xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
124 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c2, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
137 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c3, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
161 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123)); in xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
173 vacc1x0123 = wasm_v128_bitselect(vmin, vacc1x0123, wasm_f32x4_lt(vacc1x0123, vmin)); in xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
181 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vmax, wasm_f32x4_le(vacc1x0123, vmax)); in xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
191 wasm_v128_store(c1, vacc1x0123); in xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
203 wasm_v128_store(c1, vacc1x0123); in xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat()
[all …]
D3x8s4-minmax-wasmsimd-x86.c56 v128_t vacc1x0123 = vacc0x0123; in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86() local
95 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86()
109 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c1)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86()
123 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86()
137 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86()
162 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86()
174 vacc1x0123 = wasm_v128_bitselect(vmin, vacc1x0123, wasm_f32x4_lt(vacc1x0123, vmin)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86()
182 vacc1x0123 = wasm_v128_bitselect(vacc1x0123, vmax, wasm_f32x4_le(vacc1x0123, vmax)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86()
192 wasm_v128_store(c1, vacc1x0123); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86()
204 wasm_v128_store(c1, vacc1x0123); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86()
[all …]
D3x8s4-minmax-wasmsimd-arm.c58 v128_t vacc1x0123 = vacc0x0123; in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm() local
97 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm()
111 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c1)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm()
125 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm()
139 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm()
164 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123)); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm()
175 vacc1x0123 = wasm_f32x4_max(vacc1x0123, vmin); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm()
182 vacc1x0123 = wasm_f32x4_min(vacc1x0123, vmax); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm()
192 wasm_v128_store(c1, vacc1x0123); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm()
204 wasm_v128_store(c1, vacc1x0123); in xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm()
[all …]
D3x8s4-minmax-sse.c56 __m128 vacc1x0123 = vacc0x0123; in xnn_f32_igemm_minmax_ukernel_3x8s4__sse() local
95 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_3x8s4__sse()
109 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c1)); in xnn_f32_igemm_minmax_ukernel_3x8s4__sse()
123 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c2)); in xnn_f32_igemm_minmax_ukernel_3x8s4__sse()
137 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c3)); in xnn_f32_igemm_minmax_ukernel_3x8s4__sse()
162 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123)); in xnn_f32_igemm_minmax_ukernel_3x8s4__sse()
174 vacc1x0123 = _mm_min_ps(vacc1x0123, vmax); in xnn_f32_igemm_minmax_ukernel_3x8s4__sse()
182 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_igemm_minmax_ukernel_3x8s4__sse()
192 _mm_storeu_ps(c1, vacc1x0123); in xnn_f32_igemm_minmax_ukernel_3x8s4__sse()
204 _mm_storeu_ps(c1, vacc1x0123); in xnn_f32_igemm_minmax_ukernel_3x8s4__sse()
[all …]

12345678910>>...20