Home
last modified time | relevance | path

Searched refs:vmlal_s8 (Results 1 – 25 of 68) sorted by relevance

123

/external/XNNPACK/src/qs8-igemm/gen/
D4x16c16-minmax-neon-mlal-padal.c173 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
174 vprod1x0 = vmlal_s8(vprod1x0, vget_high_s8(vb0), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
175 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
176 vprod3x0 = vmlal_s8(vprod3x0, vget_high_s8(vb0), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
185 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
186 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
187 vprod2x1 = vmlal_s8(vprod2x1, vget_high_s8(vb1), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
188 vprod3x1 = vmlal_s8(vprod3x1, vget_high_s8(vb1), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
197 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
198 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
[all …]
D3x16c16-minmax-neon-mlal-padal.c147 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
148 vprod1x0 = vmlal_s8(vprod1x0, vget_high_s8(vb0), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
149 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
156 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
157 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
158 vprod2x1 = vmlal_s8(vprod2x1, vget_high_s8(vb1), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
165 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
166 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
167 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
174 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D4x8c16-minmax-neon-mlal-padal.c133 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
134 vprod1x0 = vmlal_s8(vprod1x0, vget_high_s8(vb0), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
135 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
136 vprod3x0 = vmlal_s8(vprod3x0, vget_high_s8(vb0), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
145 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
146 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
147 vprod2x1 = vmlal_s8(vprod2x1, vget_high_s8(vb1), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
148 vprod3x1 = vmlal_s8(vprod3x1, vget_high_s8(vb1), vget_high_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
157 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
158 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
[all …]
D3x8c16-minmax-neon-mlal-padal.c115 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
116 vprod1x0 = vmlal_s8(vprod1x0, vget_high_s8(vb0), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
117 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
124 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
125 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
126 vprod2x1 = vmlal_s8(vprod2x1, vget_high_s8(vb1), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
133 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
134 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
135 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
142 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
[all …]
D2x16c16-minmax-neon-mlal-padal.c121 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
122 vprod1x0 = vmlal_s8(vprod1x0, vget_high_s8(vb0), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
127 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
128 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
133 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
134 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
139 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
140 vprod1x3 = vmlal_s8(vprod1x3, vget_high_s8(vb3), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
145 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
146 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
[all …]
D4x16c2-minmax-neon-mlal-padal-dup.c130 …vprod0x0123c0 = vmlal_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
131 …vprod1x0123c0 = vmlal_s8(vprod1x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
132 …vprod2x0123c0 = vmlal_s8(vprod2x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
133 …vprod3x0123c0 = vmlal_s8(vprod3x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
143 …vprod0x4567c0 = vmlal_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
144 …vprod1x4567c0 = vmlal_s8(vprod1x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
145 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
146 …vprod3x4567c0 = vmlal_s8(vprod3x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
156 …vprod0x89ABc0 = vmlal_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
157 …vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
[all …]
D4x16c8-minmax-neon-mlal-padal.c178 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
179 vprod1x0 = vmlal_s8(vprod1x0, vb0x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
180 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
181 vprod3x0 = vmlal_s8(vprod3x0, vb0x1, va3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
191 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
192 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
193 vprod2x1 = vmlal_s8(vprod2x1, vb1x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
194 vprod3x1 = vmlal_s8(vprod3x1, vb1x1, va3x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
204 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
205 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
[all …]
D3x16c2-minmax-neon-mlal-padal-dup.c115 …vprod0x0123c0 = vmlal_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
116 …vprod1x0123c0 = vmlal_s8(vprod1x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
117 …vprod2x0123c0 = vmlal_s8(vprod2x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
125 …vprod0x4567c0 = vmlal_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
126 …vprod1x4567c0 = vmlal_s8(vprod1x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
127 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
135 …vprod0x89ABc0 = vmlal_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
136 …vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
137 …vprod2x89ABc0 = vmlal_s8(vprod2x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
145 …vprod0xCDEFc0 = vmlal_s8(vprod0xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
[all …]
D3x16c8-minmax-neon-mlal-padal.c151 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
152 vprod1x0 = vmlal_s8(vprod1x0, vb0x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
153 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
161 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
162 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
163 vprod2x1 = vmlal_s8(vprod2x1, vb1x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
171 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
172 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
173 vprod2x2 = vmlal_s8(vprod2x2, vb2x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
181 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
[all …]
D2x16c8-minmax-neon-mlal-padal.c124 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
125 vprod1x0 = vmlal_s8(vprod1x0, vb0x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
131 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
132 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
138 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
139 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
145 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
146 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
152 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
153 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
[all …]
D4x8c2-minmax-neon-mlal-padal-dup.c114 …vprod0x0123c0 = vmlal_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
115 …vprod1x0123c0 = vmlal_s8(vprod1x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
116 …vprod2x0123c0 = vmlal_s8(vprod2x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
117 …vprod3x0123c0 = vmlal_s8(vprod3x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
127 …vprod0x4567c0 = vmlal_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
128 …vprod1x4567c0 = vmlal_s8(vprod1x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
129 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
130 …vprod3x4567c0 = vmlal_s8(vprod3x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
140 …vprod0x0123c1 = vmlal_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
141 …vprod1x0123c1 = vmlal_s8(vprod1x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
[all …]
D4x8c8-minmax-neon-mlal-padal.c138 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
139 vprod1x0 = vmlal_s8(vprod1x0, vb0x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
140 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
141 vprod3x0 = vmlal_s8(vprod3x0, vb0x1, va3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
151 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
152 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
153 vprod2x1 = vmlal_s8(vprod2x1, vb1x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
154 vprod3x1 = vmlal_s8(vprod3x1, vb1x1, va3x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
164 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
165 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D4x16c16-minmax-neon-mlal-padal.c156 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
157 vprod1x0 = vmlal_s8(vprod1x0, vget_high_s8(vb0), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
158 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
159 vprod3x0 = vmlal_s8(vprod3x0, vget_high_s8(vb0), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
168 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
169 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
170 vprod2x1 = vmlal_s8(vprod2x1, vget_high_s8(vb1), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
171 vprod3x1 = vmlal_s8(vprod3x1, vget_high_s8(vb1), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
180 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
181 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
[all …]
D3x16c16-minmax-neon-mlal-padal.c132 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
133 vprod1x0 = vmlal_s8(vprod1x0, vget_high_s8(vb0), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
134 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
141 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
142 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
143 vprod2x1 = vmlal_s8(vprod2x1, vget_high_s8(vb1), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
150 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
151 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
152 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
159 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D2x16c16-minmax-neon-mlal-padal.c108 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
109 vprod1x0 = vmlal_s8(vprod1x0, vget_high_s8(vb0), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
114 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
115 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
120 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
121 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
126 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
127 vprod1x3 = vmlal_s8(vprod1x3, vget_high_s8(vb3), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
132 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
133 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
[all …]
D4x8c16-minmax-neon-mlal-padal.c116 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
117 vprod1x0 = vmlal_s8(vprod1x0, vget_high_s8(vb0), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
118 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
119 vprod3x0 = vmlal_s8(vprod3x0, vget_high_s8(vb0), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
128 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
129 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
130 vprod2x1 = vmlal_s8(vprod2x1, vget_high_s8(vb1), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
131 vprod3x1 = vmlal_s8(vprod3x1, vget_high_s8(vb1), vget_high_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
140 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
141 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
[all …]
D3x8c16-minmax-neon-mlal-padal.c100 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
101 vprod1x0 = vmlal_s8(vprod1x0, vget_high_s8(vb0), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
102 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
109 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
110 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
111 vprod2x1 = vmlal_s8(vprod2x1, vget_high_s8(vb1), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
118 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
119 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
120 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
127 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
[all …]
D3x16c8-minmax-neon-mlal-padal.c136 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
137 vprod1x0 = vmlal_s8(vprod1x0, vb0x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
138 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
146 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
147 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
148 vprod2x1 = vmlal_s8(vprod2x1, vb1x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
156 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
157 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
158 vprod2x2 = vmlal_s8(vprod2x2, vb2x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
166 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
[all …]
D4x16c2-minmax-neon-mlal-padal-dup.c113 …vprod0x0123c0 = vmlal_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
114 …vprod1x0123c0 = vmlal_s8(vprod1x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
115 …vprod2x0123c0 = vmlal_s8(vprod2x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
116 …vprod3x0123c0 = vmlal_s8(vprod3x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
126 …vprod0x4567c0 = vmlal_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
127 …vprod1x4567c0 = vmlal_s8(vprod1x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
128 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
129 …vprod3x4567c0 = vmlal_s8(vprod3x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
139 …vprod0x89ABc0 = vmlal_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
140 …vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
[all …]
D4x16c8-minmax-neon-mlal-padal.c161 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
162 vprod1x0 = vmlal_s8(vprod1x0, vb0x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
163 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
164 vprod3x0 = vmlal_s8(vprod3x0, vb0x1, va3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
174 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
175 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
176 vprod2x1 = vmlal_s8(vprod2x1, vb1x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
177 vprod3x1 = vmlal_s8(vprod3x1, vb1x1, va3x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
187 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
188 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
[all …]
D3x16c2-minmax-neon-mlal-padal-dup.c100 …vprod0x0123c0 = vmlal_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
101 …vprod1x0123c0 = vmlal_s8(vprod1x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
102 …vprod2x0123c0 = vmlal_s8(vprod2x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
110 …vprod0x4567c0 = vmlal_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
111 …vprod1x4567c0 = vmlal_s8(vprod1x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
112 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
120 …vprod0x89ABc0 = vmlal_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
121 …vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
122 …vprod2x89ABc0 = vmlal_s8(vprod2x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
130 …vprod0xCDEFc0 = vmlal_s8(vprod0xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
[all …]
D4x8c8-minmax-neon-mlal-padal.c121 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
122 vprod1x0 = vmlal_s8(vprod1x0, vb0x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
123 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
124 vprod3x0 = vmlal_s8(vprod3x0, vb0x1, va3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
134 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
135 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
136 vprod2x1 = vmlal_s8(vprod2x1, vb1x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
137 vprod3x1 = vmlal_s8(vprod3x1, vb1x1, va3x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
147 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
148 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
[all …]
D2x16c8-minmax-neon-mlal-padal.c111 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
112 vprod1x0 = vmlal_s8(vprod1x0, vb0x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
118 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
119 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
125 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
126 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
132 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
133 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
139 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
140 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
[all …]
D2x16c2-minmax-neon-mlal-padal-dup.c87 …vprod0x0123c0 = vmlal_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
88 …vprod1x0123c0 = vmlal_s8(vprod1x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
94 …vprod0x4567c0 = vmlal_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
95 …vprod1x4567c0 = vmlal_s8(vprod1x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
101 …vprod0x89ABc0 = vmlal_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
102 …vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
108 …vprod0xCDEFc0 = vmlal_s8(vprod0xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
109 …vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
115 …vprod0x0123c1 = vmlal_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
116 …vprod1x0123c1 = vmlal_s8(vprod1x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
[all …]
D4x8c2-minmax-neon-mlal-padal-dup.c97 …vprod0x0123c0 = vmlal_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
98 …vprod1x0123c0 = vmlal_s8(vprod1x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
99 …vprod2x0123c0 = vmlal_s8(vprod2x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
100 …vprod3x0123c0 = vmlal_s8(vprod3x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
110 …vprod0x4567c0 = vmlal_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
111 …vprod1x4567c0 = vmlal_s8(vprod1x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
112 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
113 …vprod3x4567c0 = vmlal_s8(vprod3x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
123 …vprod0x0123c1 = vmlal_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
124 …vprod1x0123c1 = vmlal_s8(vprod1x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup()
[all …]

123