Home
last modified time | relevance | path

Searched refs:vacc2x01234567 (Results 1 – 25 of 147) sorted by relevance

123456

/external/XNNPACK/src/f16-gemm/gen-inc/
D4x8inc-minmax-neonfp16arith-ld64.c67 …float16x8_t vacc2x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64() local
82 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
92 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
100 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
110 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
118 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
128 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
136 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
146 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
163 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_gemminc_minmax_ukernel_4x8__neonfp16arith_ld64()
[all …]
D4x16inc-minmax-neonfp16arith-ld64.c69 …float16x8_t vacc2x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64() local
87 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
101 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
114 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
128 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
141 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
155 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
168 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
182 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
204 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_gemminc_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x8inc-minmax-neonfp16arith-ld64.c79 …float16x8_t vacc2x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64() local
98 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
112 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
122 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
136 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
146 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
160 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
170 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
184 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
205 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_gemminc_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D8x8inc-minmax-neonfp16arith-ld64.c91 …float16x8_t vacc2x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() local
114 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
132 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
144 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
162 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
174 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
192 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
204 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
222 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
247 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-gemm/gen/
D4x8-minmax-neonfp16arith-ld64.c65 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
80 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
90 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
98 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
108 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
116 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
126 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
134 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
144 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
161 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c67 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
85 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
99 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
112 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
126 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
139 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
153 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
166 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
180 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
202 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c77 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
96 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
110 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
120 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
134 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
144 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
158 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
168 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
182 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
203 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D8x8-minmax-neonfp16arith-ld64.c89 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
112 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
130 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
142 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
160 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
172 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
190 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
202 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
220 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
245 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x16-minmax-neonfp16arith-ld64.c79 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64() local
103 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
123 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
140 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
160 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
177 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
197 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
214 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
234 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
262 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f16-igemm/gen/
D4x8-minmax-neonfp16arith-ld64.c61 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64() local
100 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
110 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
118 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
128 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
136 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
146 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
154 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
164 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
179 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64()
[all …]
D4x16-minmax-neonfp16arith-ld64.c63 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64() local
105 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
119 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
132 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
146 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
159 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
173 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
186 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
200 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
220 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64()
[all …]
D6x8-minmax-neonfp16arith-ld64.c69 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64() local
122 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
136 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
146 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
160 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
170 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
184 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
194 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
208 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
227 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64()
[all …]
D8x8-minmax-neonfp16arith-ld64.c77 float16x8_t vacc2x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() local
144 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c0, va2, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
162 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
174 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c1, va2, 1); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
192 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
204 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c2, va2, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
222 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
234 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
252 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
275 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2, vb01234567); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64()
[all …]
/external/XNNPACK/src/f32-igemm/gen/
D3x16s4-minmax-fma3-broadcast.c58 __m256 vacc2x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast() local
96 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c0, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
110 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c1, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
124 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
138 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
162 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
176 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
184 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
190 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
204 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast()
[all …]
D4x16s4-minmax-fma3-broadcast.c62 __m256 vacc2x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast() local
109 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c0, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
126 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c1, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
143 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
160 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
188 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
204 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
214 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
225 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
240 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast()
[all …]
D3x16-minmax-fma3-broadcast.c58 __m256 vacc2x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast() local
98 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
108 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
116 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
122 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
136 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
140 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
148 __m128 vacc2x0123 = _mm256_castps256_ps128(vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
156 vacc2x0123 = _mm256_extractf128_ps(vacc2x01234567, 1); in xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast()
D3x16-minmax-avx-broadcast.c58 __m256 vacc2x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast() local
98 vacc2x01234567 = _mm256_add_ps(vacc2x01234567, _mm256_mul_ps(va2, vb01234567)); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
108 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
116 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
122 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
136 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
140 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
148 __m128 vacc2x0123 = _mm256_castps256_ps128(vacc2x01234567); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
156 vacc2x0123 = _mm256_extractf128_ps(vacc2x01234567, 1); in xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast()
/external/XNNPACK/src/f32-gemm/gen-inc/
D3x16s4inc-minmax-fma3-broadcast.c60 __m256 vacc2x01234567 = _mm256_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast() local
79 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c0, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
93 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c1, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
107 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
121 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
145 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
157 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
165 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
171 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
188 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast()
[all …]
D4x16s4inc-minmax-fma3-broadcast.c66 __m256 vacc2x01234567 = _mm256_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast() local
89 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c0, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
106 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c1, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
123 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
140 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
168 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
182 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
192 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
203 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
222 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast()
[all …]
D3x16inc-minmax-fma3-broadcast.c60 __m256 vacc2x01234567 = _mm256_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast() local
79 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
90 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
98 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
104 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
121 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
125 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
133 __m128 vacc2x0123 = _mm256_castps256_ps128(vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
141 vacc2x0123 = _mm256_extractf128_ps(vacc2x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast()
D3x16inc-minmax-avx-broadcast.c60 __m256 vacc2x01234567 = _mm256_load_ps(acc + 32); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast() local
79 vacc2x01234567 = _mm256_add_ps(vacc2x01234567, _mm256_mul_ps(va2, vb01234567)); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
90 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
98 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
104 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
121 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
125 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
133 __m128 vacc2x0123 = _mm256_castps256_ps128(vacc2x01234567); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
141 vacc2x0123 = _mm256_extractf128_ps(vacc2x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D3x16s4-minmax-fma3-broadcast.c58 __m256 vacc2x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast() local
77 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c0, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
91 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c1, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
105 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
119 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
143 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
155 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
163 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
169 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
186 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast()
[all …]
D4x16s4-minmax-fma3-broadcast.c64 __m256 vacc2x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast() local
87 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c0, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
104 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c1, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
121 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
138 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
166 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
180 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
190 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
201 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
220 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast()
[all …]
D3x16-minmax-avx-broadcast.c58 __m256 vacc2x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast() local
77 vacc2x01234567 = _mm256_add_ps(vacc2x01234567, _mm256_mul_ps(va2, vb01234567)); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
88 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
96 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
102 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
119 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
123 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
131 __m128 vacc2x0123 = _mm256_castps256_ps128(vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
139 vacc2x0123 = _mm256_extractf128_ps(vacc2x01234567, 1); in xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast()
D3x16-minmax-fma3-broadcast.c58 __m256 vacc2x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast() local
77 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
88 vacc2x01234567 = _mm256_min_ps(vacc2x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
96 vacc2x01234567 = _mm256_max_ps(vacc2x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
102 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
119 _mm256_storeu_ps(c2, vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
123 vacc2x01234567 = vacc2x89ABCDEF; in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
131 __m128 vacc2x0123 = _mm256_castps256_ps128(vacc2x01234567); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()
139 vacc2x0123 = _mm256_extractf128_ps(vacc2x01234567, 1); in xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast()

123456