Home
last modified time | relevance | path

Searched refs:vacc3x01234567 (Results 1 – 25 of 274) sorted by relevance

1234567891011

/external/XNNPACK/src/f16-gemm/gen/
D4x8-minmax-neonfp16arith-ld64.c66 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
81 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
91 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
99 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
109 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
117 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
127 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
135 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
145 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
162 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c69 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
86 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
100 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
113 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
127 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
140 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
154 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
167 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
181 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
203 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c78 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
97 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
111 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
121 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
135 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
145 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
159 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
169 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
183 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
204 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D8x8-minmax-neonfp16arith-ld64.c90 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
113 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
131 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
143 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
161 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
173 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
191 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
203 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
221 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
246 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c81 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
104 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
124 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
141 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
161 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
178 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
198 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
215 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
235 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
263 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-igemm/gen/
D4x8-minmax-neonfp16arith-ld64.c62 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
101 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
111 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
119 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
129 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
137 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
147 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
155 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
165 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
180 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c65 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
106 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
120 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
133 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
147 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
160 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
174 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
187 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
201 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
221 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c70 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
123 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
137 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
147 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
161 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
171 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
185 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
195 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
209 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
228 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D8x8-minmax-neonfp16arith-ld64.c78 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
145 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
163 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
175 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
193 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
205 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
223 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
235 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
253 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
276 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c73 float16x8_t vacc3x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
130 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
150 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
167 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
187 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
204 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
224 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
241 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
261 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
287 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-gemm/gen-inc/
D4x8inc-minmax-neonfp16arith-ld64.c68 …float16x8_t vacc3x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() local
83 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
93 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
101 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
111 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
119 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
129 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
137 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
147 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
164 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
[all …]
D4x16inc-minmax-neonfp16arith-ld64.c71 …float16x8_t vacc3x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() local
88 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
102 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
115 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
129 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
142 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
156 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
169 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
183 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
205 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x8inc-minmax-neonfp16arith-ld64.c80 …float16x8_t vacc3x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64() local
99 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
113 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
123 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
137 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
147 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
161 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
171 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
185 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
206 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D8x8inc-minmax-neonfp16arith-ld64.c92 …float16x8_t vacc3x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() local
115 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
133 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
145 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
163 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
175 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
193 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
205 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
223 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
248 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x16inc-minmax-neonfp16arith-ld64.c83 …float16x8_t vacc3x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64() local
106 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c0, va3, 0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
126 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
143 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c1, va3, 1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
163 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
180 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c2, va3, 2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
200 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
217 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
237 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
265 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3, vb01234567); in xnn_f16_gemminc_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D4x16s4inc-minmax-fma3-broadcast.c68 __m256 vacc3x01234567 = _mm256_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
90 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c0, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
107 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c1, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
124 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
141 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
169vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c0, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
186vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c1, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
203vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
220vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c3, vzero, _CMP_NEQ_OQ… in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
234 vacc3x01234567 = _mm256_max_ps(vacc3x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
[all …]
D4x8inc-minmax-fma3-broadcast.c65 __m256 vacc3x01234567 = _mm256_load_ps(acc + 24); in xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast() local
85 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast()
94 vacc3x01234567 = _mm256_max_ps(vacc3x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast()
100 vacc3x01234567 = _mm256_min_ps(vacc3x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast()
103 _mm256_storeu_ps(c3, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast()
119 __m128 vacc3x0123 = _mm256_castps256_ps128(vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast()
129 vacc3x0123 = _mm256_extractf128_ps(vacc3x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast()
D4x16inc-minmax-fma3-broadcast.c68 __m256 vacc3x01234567 = _mm256_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast() local
90 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
103 vacc3x01234567 = _mm256_max_ps(vacc3x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
113 vacc3x01234567 = _mm256_min_ps(vacc3x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
120 _mm256_storeu_ps(c3, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
141 _mm256_storeu_ps(c3, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
146 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
156 __m128 vacc3x0123 = _mm256_castps256_ps128(vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
166 vacc3x0123 = _mm256_extractf128_ps(vacc3x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast()
D4x16inc-minmax-avx-broadcast.c68 __m256 vacc3x01234567 = _mm256_load_ps(acc + 48); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast() local
90 vacc3x01234567 = _mm256_add_ps(vacc3x01234567, _mm256_mul_ps(va3, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
103 vacc3x01234567 = _mm256_max_ps(vacc3x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
113 vacc3x01234567 = _mm256_min_ps(vacc3x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
120 _mm256_storeu_ps(c3, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
141 _mm256_storeu_ps(c3, vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
146 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
156 __m128 vacc3x0123 = _mm256_castps256_ps128(vacc3x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
166 vacc3x0123 = _mm256_extractf128_ps(vacc3x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D4x16s4-minmax-fma3-broadcast.c64 __m256 vacc3x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() local
110 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c0, vacc3x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
127 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c1, vacc3x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
144 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
161 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
189vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c0, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
206vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c1, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
223vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
240vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c3, vzero, _CMP_NEQ_OQ… in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
256 vacc3x01234567 = _mm256_max_ps(vacc3x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D4x16s4-minmax-fma3-broadcast.c66 __m256 vacc3x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
88 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c0, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
105 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c1, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
122 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c2, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
139 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
167vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c0, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
184vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c1, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
201vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c2, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
218vacc3x01234567 = _mm256_fmadd_ps(_mm256_and_ps(va3, _mm256_cmp_ps(vb01234567c3, vzero, _CMP_NEQ_OQ… in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
232 vacc3x01234567 = _mm256_max_ps(vacc3x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
[all …]
D4x8-minmax-avx-broadcast.c63 __m256 vacc3x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast() local
83 vacc3x01234567 = _mm256_add_ps(vacc3x01234567, _mm256_mul_ps(va3, vb01234567)); in xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast()
92 vacc3x01234567 = _mm256_max_ps(vacc3x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast()
98 vacc3x01234567 = _mm256_min_ps(vacc3x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast()
101 _mm256_storeu_ps(c3, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast()
117 __m128 vacc3x0123 = _mm256_castps256_ps128(vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast()
127 vacc3x0123 = _mm256_extractf128_ps(vacc3x01234567, 1); in xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast()
D4x8-minmax-fma3-broadcast.c63 __m256 vacc3x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast() local
83 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast()
92 vacc3x01234567 = _mm256_max_ps(vacc3x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast()
98 vacc3x01234567 = _mm256_min_ps(vacc3x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast()
101 _mm256_storeu_ps(c3, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast()
117 __m128 vacc3x0123 = _mm256_castps256_ps128(vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast()
127 vacc3x0123 = _mm256_extractf128_ps(vacc3x01234567, 1); in xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast()
D4x16-minmax-fma3-broadcast.c66 __m256 vacc3x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast() local
88 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
101 vacc3x01234567 = _mm256_max_ps(vacc3x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
111 vacc3x01234567 = _mm256_min_ps(vacc3x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
118 _mm256_storeu_ps(c3, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
139 _mm256_storeu_ps(c3, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
144 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
154 __m128 vacc3x0123 = _mm256_castps256_ps128(vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
164 vacc3x0123 = _mm256_extractf128_ps(vacc3x01234567, 1); in xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast()
D4x16-minmax-avx-broadcast.c66 __m256 vacc3x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast() local
88 vacc3x01234567 = _mm256_add_ps(vacc3x01234567, _mm256_mul_ps(va3, vb01234567)); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
101 vacc3x01234567 = _mm256_max_ps(vacc3x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
111 vacc3x01234567 = _mm256_min_ps(vacc3x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
118 _mm256_storeu_ps(c3, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
139 _mm256_storeu_ps(c3, vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
144 vacc3x01234567 = vacc3x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
154 __m128 vacc3x0123 = _mm256_castps256_ps128(vacc3x01234567); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()
164 vacc3x0123 = _mm256_extractf128_ps(vacc3x01234567, 1); in xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast()

1234567891011