Lines Matching refs:va0
53 const int8x8_t va0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup() local
72 …prod0x0123c0 = vmull_s8(vb0123c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
73 …prod0x0123c1 = vmull_s8(vb0123c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
74 …prod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 2))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
75 …prod0x0123c3 = vmull_s8(vb0123c3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 3))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
80 …prod0x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
81 …prod0x4567c1 = vmull_s8(vb4567c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
82 …prod0x4567c2 = vmull_s8(vb4567c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 2))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
83 …prod0x4567c3 = vmull_s8(vb4567c3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 3))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
88 …prod0x89ABc0 = vmull_s8(vb89ABc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
89 …prod0x89ABc1 = vmull_s8(vb89ABc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
90 …prod0x89ABc2 = vmull_s8(vb89ABc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 2))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
91 …prod0x89ABc3 = vmull_s8(vb89ABc3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 3))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
96 …prod0xCDEFc0 = vmull_s8(vbCDEFc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
97 …prod0xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
98 …prod0xCDEFc2 = vmull_s8(vbCDEFc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 2))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
99 …prod0xCDEFc3 = vmull_s8(vbCDEFc3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 3))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
109 const int8x8_t va0 = vld1_s8(a0); a0 = (const int8_t*) ((uintptr_t) a0 + k); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup() local
116 …prod0x0123c0 = vmull_s8(vb0123c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
118 …prod0x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
120 …prod0x89ABc0 = vmull_s8(vb89ABc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
122 …prod0xCDEFc0 = vmull_s8(vbCDEFc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
131 …prod0x0123c1 = vmull_s8(vb0123c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
133 …prod0x4567c1 = vmull_s8(vb4567c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
135 …prod0x89ABc1 = vmull_s8(vb89ABc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
137 …prod0xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
146 …prod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 2))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
148 …prod0x4567c2 = vmull_s8(vb4567c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 2))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
150 …prod0x89ABc2 = vmull_s8(vb89ABc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 2))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
152 …prod0xCDEFc2 = vmull_s8(vbCDEFc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 2))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()