Lines Matching full:val
175 c0.val[0] = vmlal_lane_u16(c0.val[0], b00_u16.val[0], a00_u16.val[0], 0); in vector_matrix_multiply_u8()
176 c0.val[1] = vmlal_lane_u16(c0.val[1], b00_u16.val[1], a00_u16.val[0], 0); in vector_matrix_multiply_u8()
177 c0.val[2] = vmlal_lane_u16(c0.val[2], b00_u16.val[2], a00_u16.val[0], 0); in vector_matrix_multiply_u8()
178 c0.val[3] = vmlal_lane_u16(c0.val[3], b00_u16.val[3], a00_u16.val[0], 0); in vector_matrix_multiply_u8()
181 c0.val[0] = vmlal_lane_u16(c0.val[0], b10_u16.val[0], a00_u16.val[0], 1); in vector_matrix_multiply_u8()
182 c0.val[1] = vmlal_lane_u16(c0.val[1], b10_u16.val[1], a00_u16.val[0], 1); in vector_matrix_multiply_u8()
183 c0.val[2] = vmlal_lane_u16(c0.val[2], b10_u16.val[2], a00_u16.val[0], 1); in vector_matrix_multiply_u8()
184 c0.val[3] = vmlal_lane_u16(c0.val[3], b10_u16.val[3], a00_u16.val[0], 1); in vector_matrix_multiply_u8()
187 c0.val[0] = vmlal_lane_u16(c0.val[0], b20_u16.val[0], a00_u16.val[0], 2); in vector_matrix_multiply_u8()
188 c0.val[1] = vmlal_lane_u16(c0.val[1], b20_u16.val[1], a00_u16.val[0], 2); in vector_matrix_multiply_u8()
189 c0.val[2] = vmlal_lane_u16(c0.val[2], b20_u16.val[2], a00_u16.val[0], 2); in vector_matrix_multiply_u8()
190 c0.val[3] = vmlal_lane_u16(c0.val[3], b20_u16.val[3], a00_u16.val[0], 2); in vector_matrix_multiply_u8()
193 c0.val[0] = vmlal_lane_u16(c0.val[0], b30_u16.val[0], a00_u16.val[0], 3); in vector_matrix_multiply_u8()
194 c0.val[1] = vmlal_lane_u16(c0.val[1], b30_u16.val[1], a00_u16.val[0], 3); in vector_matrix_multiply_u8()
195 c0.val[2] = vmlal_lane_u16(c0.val[2], b30_u16.val[2], a00_u16.val[0], 3); in vector_matrix_multiply_u8()
196 c0.val[3] = vmlal_lane_u16(c0.val[3], b30_u16.val[3], a00_u16.val[0], 3); in vector_matrix_multiply_u8()
199 c0.val[0] = vmlal_lane_u16(c0.val[0], b40_u16.val[0], a00_u16.val[1], 0); in vector_matrix_multiply_u8()
200 c0.val[1] = vmlal_lane_u16(c0.val[1], b40_u16.val[1], a00_u16.val[1], 0); in vector_matrix_multiply_u8()
201 c0.val[2] = vmlal_lane_u16(c0.val[2], b40_u16.val[2], a00_u16.val[1], 0); in vector_matrix_multiply_u8()
202 c0.val[3] = vmlal_lane_u16(c0.val[3], b40_u16.val[3], a00_u16.val[1], 0); in vector_matrix_multiply_u8()
205 c0.val[0] = vmlal_lane_u16(c0.val[0], b50_u16.val[0], a00_u16.val[1], 1); in vector_matrix_multiply_u8()
206 c0.val[1] = vmlal_lane_u16(c0.val[1], b50_u16.val[1], a00_u16.val[1], 1); in vector_matrix_multiply_u8()
207 c0.val[2] = vmlal_lane_u16(c0.val[2], b50_u16.val[2], a00_u16.val[1], 1); in vector_matrix_multiply_u8()
208 c0.val[3] = vmlal_lane_u16(c0.val[3], b50_u16.val[3], a00_u16.val[1], 1); in vector_matrix_multiply_u8()
211 c0.val[0] = vmlal_lane_u16(c0.val[0], b60_u16.val[0], a00_u16.val[1], 2); in vector_matrix_multiply_u8()
212 c0.val[1] = vmlal_lane_u16(c0.val[1], b60_u16.val[1], a00_u16.val[1], 2); in vector_matrix_multiply_u8()
213 c0.val[2] = vmlal_lane_u16(c0.val[2], b60_u16.val[2], a00_u16.val[1], 2); in vector_matrix_multiply_u8()
214 c0.val[3] = vmlal_lane_u16(c0.val[3], b60_u16.val[3], a00_u16.val[1], 2); in vector_matrix_multiply_u8()
217 c0.val[0] = vmlal_lane_u16(c0.val[0], b70_u16.val[0], a00_u16.val[1], 3); in vector_matrix_multiply_u8()
218 c0.val[1] = vmlal_lane_u16(c0.val[1], b70_u16.val[1], a00_u16.val[1], 3); in vector_matrix_multiply_u8()
219 c0.val[2] = vmlal_lane_u16(c0.val[2], b70_u16.val[2], a00_u16.val[1], 3); in vector_matrix_multiply_u8()
220 c0.val[3] = vmlal_lane_u16(c0.val[3], b70_u16.val[3], a00_u16.val[1], 3); in vector_matrix_multiply_u8()
246 c0.val[0] = vmlal_lane_u16(c0.val[0], b00_u16.val[0], a00_u16, 0); in vector_matrix_multiply_u8()
247 c0.val[1] = vmlal_lane_u16(c0.val[1], b00_u16.val[1], a00_u16, 0); in vector_matrix_multiply_u8()
248 c0.val[2] = vmlal_lane_u16(c0.val[2], b00_u16.val[2], a00_u16, 0); in vector_matrix_multiply_u8()
249 c0.val[3] = vmlal_lane_u16(c0.val[3], b00_u16.val[3], a00_u16, 0); in vector_matrix_multiply_u8()
258 vst1q_s32(vec_out + 0, vreinterpretq_s32_u32(c0.val[0])); in vector_matrix_multiply_u8()
259 vst1q_s32(vec_out + 4, vreinterpretq_s32_u32(c0.val[1])); in vector_matrix_multiply_u8()
260 vst1q_s32(vec_out + 8, vreinterpretq_s32_u32(c0.val[2])); in vector_matrix_multiply_u8()
261 vst1q_s32(vec_out + 12, vreinterpretq_s32_u32(c0.val[3])); in vector_matrix_multiply_u8()
270 *(vec_out + k * 4 + j) = c0.val[k][j]; in vector_matrix_multiply_u8()
405 c0.val[0] = vmlal_lane_s16(c0.val[0], b00_s16.val[0], a00_s16.val[0], 0); in vector_matrix_multiply_s8()
406 c0.val[1] = vmlal_lane_s16(c0.val[1], b00_s16.val[1], a00_s16.val[0], 0); in vector_matrix_multiply_s8()
407 c0.val[2] = vmlal_lane_s16(c0.val[2], b00_s16.val[2], a00_s16.val[0], 0); in vector_matrix_multiply_s8()
408 c0.val[3] = vmlal_lane_s16(c0.val[3], b00_s16.val[3], a00_s16.val[0], 0); in vector_matrix_multiply_s8()
411 c0.val[0] = vmlal_lane_s16(c0.val[0], b10_s16.val[0], a00_s16.val[0], 1); in vector_matrix_multiply_s8()
412 c0.val[1] = vmlal_lane_s16(c0.val[1], b10_s16.val[1], a00_s16.val[0], 1); in vector_matrix_multiply_s8()
413 c0.val[2] = vmlal_lane_s16(c0.val[2], b10_s16.val[2], a00_s16.val[0], 1); in vector_matrix_multiply_s8()
414 c0.val[3] = vmlal_lane_s16(c0.val[3], b10_s16.val[3], a00_s16.val[0], 1); in vector_matrix_multiply_s8()
417 c0.val[0] = vmlal_lane_s16(c0.val[0], b20_s16.val[0], a00_s16.val[0], 2); in vector_matrix_multiply_s8()
418 c0.val[1] = vmlal_lane_s16(c0.val[1], b20_s16.val[1], a00_s16.val[0], 2); in vector_matrix_multiply_s8()
419 c0.val[2] = vmlal_lane_s16(c0.val[2], b20_s16.val[2], a00_s16.val[0], 2); in vector_matrix_multiply_s8()
420 c0.val[3] = vmlal_lane_s16(c0.val[3], b20_s16.val[3], a00_s16.val[0], 2); in vector_matrix_multiply_s8()
423 c0.val[0] = vmlal_lane_s16(c0.val[0], b30_s16.val[0], a00_s16.val[0], 3); in vector_matrix_multiply_s8()
424 c0.val[1] = vmlal_lane_s16(c0.val[1], b30_s16.val[1], a00_s16.val[0], 3); in vector_matrix_multiply_s8()
425 c0.val[2] = vmlal_lane_s16(c0.val[2], b30_s16.val[2], a00_s16.val[0], 3); in vector_matrix_multiply_s8()
426 c0.val[3] = vmlal_lane_s16(c0.val[3], b30_s16.val[3], a00_s16.val[0], 3); in vector_matrix_multiply_s8()
429 c0.val[0] = vmlal_lane_s16(c0.val[0], b40_s16.val[0], a00_s16.val[1], 0); in vector_matrix_multiply_s8()
430 c0.val[1] = vmlal_lane_s16(c0.val[1], b40_s16.val[1], a00_s16.val[1], 0); in vector_matrix_multiply_s8()
431 c0.val[2] = vmlal_lane_s16(c0.val[2], b40_s16.val[2], a00_s16.val[1], 0); in vector_matrix_multiply_s8()
432 c0.val[3] = vmlal_lane_s16(c0.val[3], b40_s16.val[3], a00_s16.val[1], 0); in vector_matrix_multiply_s8()
435 c0.val[0] = vmlal_lane_s16(c0.val[0], b50_s16.val[0], a00_s16.val[1], 1); in vector_matrix_multiply_s8()
436 c0.val[1] = vmlal_lane_s16(c0.val[1], b50_s16.val[1], a00_s16.val[1], 1); in vector_matrix_multiply_s8()
437 c0.val[2] = vmlal_lane_s16(c0.val[2], b50_s16.val[2], a00_s16.val[1], 1); in vector_matrix_multiply_s8()
438 c0.val[3] = vmlal_lane_s16(c0.val[3], b50_s16.val[3], a00_s16.val[1], 1); in vector_matrix_multiply_s8()
441 c0.val[0] = vmlal_lane_s16(c0.val[0], b60_s16.val[0], a00_s16.val[1], 2); in vector_matrix_multiply_s8()
442 c0.val[1] = vmlal_lane_s16(c0.val[1], b60_s16.val[1], a00_s16.val[1], 2); in vector_matrix_multiply_s8()
443 c0.val[2] = vmlal_lane_s16(c0.val[2], b60_s16.val[2], a00_s16.val[1], 2); in vector_matrix_multiply_s8()
444 c0.val[3] = vmlal_lane_s16(c0.val[3], b60_s16.val[3], a00_s16.val[1], 2); in vector_matrix_multiply_s8()
447 c0.val[0] = vmlal_lane_s16(c0.val[0], b70_s16.val[0], a00_s16.val[1], 3); in vector_matrix_multiply_s8()
448 c0.val[1] = vmlal_lane_s16(c0.val[1], b70_s16.val[1], a00_s16.val[1], 3); in vector_matrix_multiply_s8()
449 c0.val[2] = vmlal_lane_s16(c0.val[2], b70_s16.val[2], a00_s16.val[1], 3); in vector_matrix_multiply_s8()
450 c0.val[3] = vmlal_lane_s16(c0.val[3], b70_s16.val[3], a00_s16.val[1], 3); in vector_matrix_multiply_s8()
476 c0.val[0] = vmlal_lane_s16(c0.val[0], b00_s16.val[0], a00_s16, 0); in vector_matrix_multiply_s8()
477 c0.val[1] = vmlal_lane_s16(c0.val[1], b00_s16.val[1], a00_s16, 0); in vector_matrix_multiply_s8()
478 c0.val[2] = vmlal_lane_s16(c0.val[2], b00_s16.val[2], a00_s16, 0); in vector_matrix_multiply_s8()
479 c0.val[3] = vmlal_lane_s16(c0.val[3], b00_s16.val[3], a00_s16, 0); in vector_matrix_multiply_s8()
488 vst1q_s32(vec_out + 0, c0.val[0]); in vector_matrix_multiply_s8()
489 vst1q_s32(vec_out + 4, c0.val[1]); in vector_matrix_multiply_s8()
490 vst1q_s32(vec_out + 8, c0.val[2]); in vector_matrix_multiply_s8()
491 vst1q_s32(vec_out + 12, c0.val[3]); in vector_matrix_multiply_s8()
500 *(vec_out + k * 4 + j) = c0.val[k][j]; in vector_matrix_multiply_s8()
583 c0.val[0] = vmlal_lane_u16(c0.val[0], b00_u16.val[0], a00_u16, 0); in matrix_multiply_u8()
584 c0.val[1] = vmlal_lane_u16(c0.val[1], b00_u16.val[1], a00_u16, 0); in matrix_multiply_u8()
585 c0.val[2] = vmlal_lane_u16(c0.val[2], b00_u16.val[2], a00_u16, 0); in matrix_multiply_u8()
586 c0.val[3] = vmlal_lane_u16(c0.val[3], b00_u16.val[3], a00_u16, 0); in matrix_multiply_u8()
589 c1.val[0] = vmlal_lane_u16(c1.val[0], b00_u16.val[0], a00_u16, 1); in matrix_multiply_u8()
590 c1.val[1] = vmlal_lane_u16(c1.val[1], b00_u16.val[1], a00_u16, 1); in matrix_multiply_u8()
591 c1.val[2] = vmlal_lane_u16(c1.val[2], b00_u16.val[2], a00_u16, 1); in matrix_multiply_u8()
592 c1.val[3] = vmlal_lane_u16(c1.val[3], b00_u16.val[3], a00_u16, 1); in matrix_multiply_u8()
595 c2.val[0] = vmlal_lane_u16(c2.val[0], b00_u16.val[0], a00_u16, 2); in matrix_multiply_u8()
596 c2.val[1] = vmlal_lane_u16(c2.val[1], b00_u16.val[1], a00_u16, 2); in matrix_multiply_u8()
597 c2.val[2] = vmlal_lane_u16(c2.val[2], b00_u16.val[2], a00_u16, 2); in matrix_multiply_u8()
598 c2.val[3] = vmlal_lane_u16(c2.val[3], b00_u16.val[3], a00_u16, 2); in matrix_multiply_u8()
601 c3.val[0] = vmlal_lane_u16(c3.val[0], b00_u16.val[0], a00_u16, 3); in matrix_multiply_u8()
602 c3.val[1] = vmlal_lane_u16(c3.val[1], b00_u16.val[1], a00_u16, 3); in matrix_multiply_u8()
603 c3.val[2] = vmlal_lane_u16(c3.val[2], b00_u16.val[2], a00_u16, 3); in matrix_multiply_u8()
604 c3.val[3] = vmlal_lane_u16(c3.val[3], b00_u16.val[3], a00_u16, 3); in matrix_multiply_u8()
611 vst1q_s32(mtx_out + 0 * out_stride + 0, vreinterpretq_s32_u32(c0.val[0])); in matrix_multiply_u8()
612 vst1q_s32(mtx_out + 0 * out_stride + 4, vreinterpretq_s32_u32(c0.val[1])); in matrix_multiply_u8()
613 vst1q_s32(mtx_out + 0 * out_stride + 8, vreinterpretq_s32_u32(c0.val[2])); in matrix_multiply_u8()
614 vst1q_s32(mtx_out + 0 * out_stride + 12, vreinterpretq_s32_u32(c0.val[3])); in matrix_multiply_u8()
617 vst1q_s32(mtx_out + 1 * out_stride + 0, vreinterpretq_s32_u32(c1.val[0])); in matrix_multiply_u8()
618 vst1q_s32(mtx_out + 1 * out_stride + 4, vreinterpretq_s32_u32(c1.val[1])); in matrix_multiply_u8()
619 vst1q_s32(mtx_out + 1 * out_stride + 8, vreinterpretq_s32_u32(c1.val[2])); in matrix_multiply_u8()
620 vst1q_s32(mtx_out + 1 * out_stride + 12, vreinterpretq_s32_u32(c1.val[3])); in matrix_multiply_u8()
623 vst1q_s32(mtx_out + 2 * out_stride + 0, vreinterpretq_s32_u32(c2.val[0])); in matrix_multiply_u8()
624 vst1q_s32(mtx_out + 2 * out_stride + 4, vreinterpretq_s32_u32(c2.val[1])); in matrix_multiply_u8()
625 vst1q_s32(mtx_out + 2 * out_stride + 8, vreinterpretq_s32_u32(c2.val[2])); in matrix_multiply_u8()
626 vst1q_s32(mtx_out + 2 * out_stride + 12, vreinterpretq_s32_u32(c2.val[3])); in matrix_multiply_u8()
629 vst1q_s32(mtx_out + 3 * out_stride + 0, vreinterpretq_s32_u32(c3.val[0])); in matrix_multiply_u8()
630 vst1q_s32(mtx_out + 3 * out_stride + 4, vreinterpretq_s32_u32(c3.val[1])); in matrix_multiply_u8()
631 vst1q_s32(mtx_out + 3 * out_stride + 8, vreinterpretq_s32_u32(c3.val[2])); in matrix_multiply_u8()
632 vst1q_s32(mtx_out + 3 * out_stride + 12, vreinterpretq_s32_u32(c3.val[3])); in matrix_multiply_u8()
645 *(mtx_out + k * 4 + j) = c0.val[k][j]; in matrix_multiply_u8()
655 *(mtx_out + out_stride + k * 4 + j) = c1.val[k][j]; in matrix_multiply_u8()
665 *(mtx_out + out_stride * 2 + k * 4 + j) = c2.val[k][j]; in matrix_multiply_u8()
675 *(mtx_out + out_stride * 3 + k * 4 + j) = c3.val[k][j]; in matrix_multiply_u8()
764 c0.val[0] = vmlal_lane_s16(c0.val[0], b00_s16.val[0], a00_s16, 0); in matrix_multiply_s8()
765 c0.val[1] = vmlal_lane_s16(c0.val[1], b00_s16.val[1], a00_s16, 0); in matrix_multiply_s8()
766 c0.val[2] = vmlal_lane_s16(c0.val[2], b00_s16.val[2], a00_s16, 0); in matrix_multiply_s8()
767 c0.val[3] = vmlal_lane_s16(c0.val[3], b00_s16.val[3], a00_s16, 0); in matrix_multiply_s8()
770 c1.val[0] = vmlal_lane_s16(c1.val[0], b00_s16.val[0], a00_s16, 1); in matrix_multiply_s8()
771 c1.val[1] = vmlal_lane_s16(c1.val[1], b00_s16.val[1], a00_s16, 1); in matrix_multiply_s8()
772 c1.val[2] = vmlal_lane_s16(c1.val[2], b00_s16.val[2], a00_s16, 1); in matrix_multiply_s8()
773 c1.val[3] = vmlal_lane_s16(c1.val[3], b00_s16.val[3], a00_s16, 1); in matrix_multiply_s8()
776 c2.val[0] = vmlal_lane_s16(c2.val[0], b00_s16.val[0], a00_s16, 2); in matrix_multiply_s8()
777 c2.val[1] = vmlal_lane_s16(c2.val[1], b00_s16.val[1], a00_s16, 2); in matrix_multiply_s8()
778 c2.val[2] = vmlal_lane_s16(c2.val[2], b00_s16.val[2], a00_s16, 2); in matrix_multiply_s8()
779 c2.val[3] = vmlal_lane_s16(c2.val[3], b00_s16.val[3], a00_s16, 2); in matrix_multiply_s8()
782 c3.val[0] = vmlal_lane_s16(c3.val[0], b00_s16.val[0], a00_s16, 3); in matrix_multiply_s8()
783 c3.val[1] = vmlal_lane_s16(c3.val[1], b00_s16.val[1], a00_s16, 3); in matrix_multiply_s8()
784 c3.val[2] = vmlal_lane_s16(c3.val[2], b00_s16.val[2], a00_s16, 3); in matrix_multiply_s8()
785 c3.val[3] = vmlal_lane_s16(c3.val[3], b00_s16.val[3], a00_s16, 3); in matrix_multiply_s8()
790 vst1q_s32(mtx_out + 0 * out_stride + 0, c0.val[0]); in matrix_multiply_s8()
791 vst1q_s32(mtx_out + 0 * out_stride + 4, c0.val[1]); in matrix_multiply_s8()
792 vst1q_s32(mtx_out + 0 * out_stride + 8, c0.val[2]); in matrix_multiply_s8()
793 vst1q_s32(mtx_out + 0 * out_stride + 12, c0.val[3]); in matrix_multiply_s8()
796 vst1q_s32(mtx_out + 1 * out_stride + 0, c1.val[0]); in matrix_multiply_s8()
797 vst1q_s32(mtx_out + 1 * out_stride + 4, c1.val[1]); in matrix_multiply_s8()
798 vst1q_s32(mtx_out + 1 * out_stride + 8, c1.val[2]); in matrix_multiply_s8()
799 vst1q_s32(mtx_out + 1 * out_stride + 12, c1.val[3]); in matrix_multiply_s8()
802 vst1q_s32(mtx_out + 2 * out_stride + 0, c2.val[0]); in matrix_multiply_s8()
803 vst1q_s32(mtx_out + 2 * out_stride + 4, c2.val[1]); in matrix_multiply_s8()
804 vst1q_s32(mtx_out + 2 * out_stride + 8, c2.val[2]); in matrix_multiply_s8()
805 vst1q_s32(mtx_out + 2 * out_stride + 12, c2.val[3]); in matrix_multiply_s8()
808 vst1q_s32(mtx_out + 3 * out_stride + 0, c3.val[0]); in matrix_multiply_s8()
809 vst1q_s32(mtx_out + 3 * out_stride + 4, c3.val[1]); in matrix_multiply_s8()
810 vst1q_s32(mtx_out + 3 * out_stride + 8, c3.val[2]); in matrix_multiply_s8()
811 vst1q_s32(mtx_out + 3 * out_stride + 12, c3.val[3]); in matrix_multiply_s8()
824 *(mtx_out + k * 4 + j) = c0.val[k][j]; in matrix_multiply_s8()
834 *(mtx_out + out_stride + k * 4 + j) = c1.val[k][j]; in matrix_multiply_s8()
844 *(mtx_out + out_stride * 2 + k * 4 + j) = c2.val[k][j]; in matrix_multiply_s8()
854 *(mtx_out + out_stride * 3 + k * 4 + j) = c3.val[k][j]; in matrix_multiply_s8()