Home
last modified time | relevance | path

Searched refs:vacc0x3 (Results 1 – 25 of 413) sorted by relevance

12345678910>>...17

/external/XNNPACK/src/bf16-gemm/gen/
D1x4c8-minmax-neonbf16-bfmlal.c47 …float32x4_t vacc0x3 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal() local
61 vacc0x3 = vbfmlalbq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal()
66 vacc0x3 = vbfmlaltq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal()
91 vacc0x3 = vbfmlalbq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal()
92 vacc0x3 = vbfmlaltq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal()
97 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal()
104 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfmlal()
D1x4c8-minmax-neonfma-zip.c48 …float32x4_t vacc0x3 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip() local
69 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
81 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
114 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
129 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
134 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
141 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_zip()
D2x4c8-minmax-neonbf16-bfmlal.c53 …float32x4_t vacc0x3 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal() local
57 float32x4_t vacc1x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal()
75 vacc0x3 = vbfmlalbq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal()
84 vacc0x3 = vbfmlaltq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal()
120 vacc0x3 = vbfmlalbq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal()
121 vacc0x3 = vbfmlaltq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal()
130 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal()
142 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfmlal()
D1x4c8-minmax-neonfma-shland.c48 …float32x4_t vacc0x3 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland() local
69 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
81 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
114 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
129 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
134 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
141 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonfma_shland()
D1x4c8-minmax-neonbf16-bfdot.c47 …float32x4_t vacc0x3 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot() local
61 vacc0x3 = vbfdotq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot()
83 vacc0x3 = vbfdotq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot()
88 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot()
95 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_1x4c8__neonbf16_bfdot()
D3x4c8-minmax-neonbf16-bfmlal.c59 …float32x4_t vacc0x3 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() local
63 float32x4_t vacc1x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
67 float32x4_t vacc2x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
89 vacc0x3 = vbfmlalbq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
102 vacc0x3 = vbfmlaltq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
149 vacc0x3 = vbfmlalbq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
150 vacc0x3 = vbfmlaltq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
163 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
180 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
D2x4c8-minmax-neonfma-shland.c54 …float32x4_t vacc0x3 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland() local
58 float32x4_t vacc1x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
84 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
101 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
147 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
170 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
177 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
189 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_shland()
D2x4c8-minmax-neonfma-zip.c54 …float32x4_t vacc0x3 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip() local
58 float32x4_t vacc1x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
84 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
101 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
147 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
170 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
177 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
189 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonfma_zip()
D4x4c8-minmax-neonbf16-bfmlal.c65 …float32x4_t vacc0x3 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() local
69 float32x4_t vacc1x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
73 float32x4_t vacc2x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
77 float32x4_t vacc3x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
103 vacc0x3 = vbfmlalbq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
120 vacc0x3 = vbfmlaltq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
178 vacc0x3 = vbfmlalbq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
179 vacc0x3 = vbfmlaltq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
196 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
218 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
D2x4c8-minmax-neonbf16-bfdot.c53 …float32x4_t vacc0x3 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfdot() local
57 float32x4_t vacc1x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfdot()
75 vacc0x3 = vbfdotq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfdot()
105 vacc0x3 = vbfdotq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfdot()
113 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfdot()
125 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_2x4c8__neonbf16_bfdot()
D3x4c8-minmax-neonfma-shland.c60 …float32x4_t vacc0x3 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
64 float32x4_t vacc1x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
68 float32x4_t vacc2x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
99 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
121 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
180 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
211 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
220 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
237 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
D3x4c8-minmax-neonfma-zip.c60 …float32x4_t vacc0x3 = vreinterpretq_f32_u32(vshll_n_u16(vld1_lane_u16(w, vdup_n_u16(0), 0), 16)); … in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
64 float32x4_t vacc1x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
68 float32x4_t vacc2x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
99 vacc0x3 = vfmaq_f32(vacc0x3, va0e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
121 vacc0x3 = vfmaq_f32(vacc0x3, va0o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
180 vacc0x3 = vfmaq_f32(vacc0x3, va0x3e, vb3e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
211 vacc0x3 = vfmaq_f32(vacc0x3, va0x3o, vb3o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
220 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
237 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
D5x4c8-minmax-neonbf16-bfmlal.c71 …float32x4_t vacc0x3 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal() local
75 float32x4_t vacc1x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
79 float32x4_t vacc2x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
83 float32x4_t vacc3x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
87 float32x4_t vacc4x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
117 vacc0x3 = vbfmlalbq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
138 vacc0x3 = vbfmlaltq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
207 vacc0x3 = vbfmlalbq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
208 vacc0x3 = vbfmlaltq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
229 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
[all …]
D3x4c8-minmax-neonbf16-bfdot.c59 …float32x4_t vacc0x3 = vcvt_f32_bf16(vld1_lane_bf16(w, vreinterpret_bf16_u16(vdup_n_u16(0)), 0)); w… in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot() local
63 float32x4_t vacc1x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
67 float32x4_t vacc2x3 = vacc0x3; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
89 vacc0x3 = vbfdotq_f32(vacc0x3, va0, vb3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
127 vacc0x3 = vbfdotq_f32(vacc0x3, va0x3, vb3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
138 const float32x4_t vacc0x23 = vpaddq_f32(vacc0x2, vacc0x3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
155 const float32x2_t vsum0x3 = vadd_f32(vget_low_f32(vacc0x3), vget_high_f32(vacc0x3)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
/external/XNNPACK/src/f32-ppmm/gen/
D2x4-minmax-scalar.c43 float vacc0x3 = w[3]; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local
47 float vacc1x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
68 vacc0x3 += va0 * vb3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
81 vacc0x3 = math_min_f32(vacc0x3, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
91 vacc0x3 = math_max_f32(vacc0x3, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
102 c0[3] = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
D4x4-minmax-scalar.c51 float vacc0x3 = w[3]; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local
55 float vacc1x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
59 float vacc2x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
63 float vacc3x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
92 vacc0x3 += va0 * vb3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
113 vacc0x3 = math_min_f32(vacc0x3, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
131 vacc0x3 = math_max_f32(vacc0x3, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
152 c0[3] = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
/external/XNNPACK/src/f32-vmulcaddc/gen/
Dc4-minmax-scalar-2x.c57 float vacc0x3 = i0[3]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local
73 vacc0x3 = vacc0x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
82 vacc0x3 = math_max_f32(vacc0x3, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
91 vacc0x3 = math_min_f32(vacc0x3, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
100 o0[3] = vacc0x3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
Dc4-minmax-wasm-2x.c57 float vacc0x3 = i0[3]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local
73 vacc0x3 = vacc0x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
82 vacc0x3 = __builtin_wasm_max_f32(vacc0x3, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
91 vacc0x3 = __builtin_wasm_min_f32(vacc0x3, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
100 o0[3] = vacc0x3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
/external/XNNPACK/src/f32-spmm/gen/
D8x4-minmax-scalar.c65 float vacc0x3 = *w++; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local
66 float vacc1x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar()
67 float vacc2x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar()
68 float vacc3x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar()
69 float vacc4x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar()
70 float vacc5x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar()
71 float vacc6x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar()
72 float vacc7x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar()
113 vacc0x3 += vi0 * vw3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar()
147 float vout0x3 = math_min_f32(vacc0x3, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D1x4c8-xw-minmax-fp32-sse2.c48 __m128i vacc0x3 = _mm_cvtsi32_si128(((const int*) w)[3]); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__sse2() local
68 vacc0x3 = _mm_add_epi32(vacc0x3, _mm_madd_epi16(vxa0, vxb3)); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__sse2()
75 … vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(vacc0x1, vacc0x… in xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__sse2()
D1x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c47 v128_t vacc0x3 = wasm_v128_load32_zero((const int32_t*) w + 3); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128() local
66 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_dot_i16x8(vxa0, vxb3)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128()
73 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128()
D1x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c47 v128_t vacc0x3 = wasm_v128_load32_zero((const int32_t*) w + 3); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64() local
66 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_dot_i16x8(vxa0, vxb3)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64()
73 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64()
D1x4c8-xw-minmax-fp32-wasmsimd-dot16x2.c47 v128_t vacc0x3 = wasm_v128_load32_zero((const int32_t*) w + 3); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2() local
66 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_dot_i16x8(vxa0, vxb3)); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2()
73 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, in xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2()
/external/XNNPACK/src/qc8-gemm/gen/
D1x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c47 v128_t vacc0x3 = wasm_v128_load32_zero((const int32_t*) w + 3); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64() local
66 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_dot_i16x8(vxa0, vxb3)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64()
73 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64()
D1x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c47 v128_t vacc0x3 = wasm_v128_load32_zero((const int32_t*) w + 3); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128() local
66 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_dot_i16x8(vxa0, vxb3)); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128()
73 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128()

12345678910>>...17