Home
last modified time | relevance | path

Searched refs:va0x1 (Results 1 – 25 of 32) sorted by relevance

12

/external/XNNPACK/src/qs8-igemm/gen/
D1x16c8-minmax-neon-mlal-padal.c76 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
97 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
101 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
105 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
109 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
113 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
117 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
121 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
125 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
129 vprod0x8 = vmlal_s8(vprod0x8, vb8x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
[all …]
D1x16c2-minmax-neon-mlal-padal-dup.c64 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup() local
85 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
89 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
93 …l_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
97 …l_s8(vprod0xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
101 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
105 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
109 …l_s8(vprod0x89ABc1, vb89ABc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
113 …l_s8(vprod0xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
117 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
[all …]
D1x8c2-minmax-neon-mlal-padal-dup.c62 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup() local
75 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
79 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
83 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
87 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
91 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
95 …l_s8(vprod0x4567c2, vb4567c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
99 …l_s8(vprod0x0123c3, vb0123c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
103 …l_s8(vprod0x4567c3, vb4567c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
D1x8c8-minmax-neon-mlal-padal.c68 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
81 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
85 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
89 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
93 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
97 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
101 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
105 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
109 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c100 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
124 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
131 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
138 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
145 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
152 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
159 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
166 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
173 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
180 vprod0x8 = vmlal_s8(vprod0x8, vb8x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
[all …]
D2x16c2-minmax-neon-mlal-padal-dup.c76 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup() local
100 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
107 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
114 …l_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
121 …l_s8(vprod0xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
128 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
135 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
142 …l_s8(vprod0x89ABc1, vb89ABc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
149 …l_s8(vprod0xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
156 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
[all …]
D2x8c8-minmax-neon-mlal-padal.c84 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
100 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
107 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
114 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
121 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
128 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
135 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
142 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
149 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c2-minmax-neon-mlal-padal-dup.c72 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup() local
88 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
95 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
102 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
109 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
116 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
123 …l_s8(vprod0x4567c2, vb4567c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
130 …l_s8(vprod0x0123c3, vb0123c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
137 …l_s8(vprod0x4567c3, vb4567c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
D3x16c2-minmax-neon-mlal-padal-dup.c88 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local
115 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
125 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
135 …l_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
145 …l_s8(vprod0xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
155 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
165 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
175 …l_s8(vprod0x89ABc1, vb89ABc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
185 …l_s8(vprod0xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
195 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
[all …]
D3x16c8-minmax-neon-mlal-padal.c124 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
151 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
161 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
171 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
181 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
191 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
201 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
211 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
221 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
231 vprod0x8 = vmlal_s8(vprod0x8, vb8x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
[all …]
D3x8c8-minmax-neon-mlal-padal.c100 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
119 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
129 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
139 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
149 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
159 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
169 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
179 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
189 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c2-minmax-neon-mlal-padal-dup.c82 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local
101 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
111 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
121 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
131 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
141 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
151 …l_s8(vprod0x4567c2, vb4567c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
161 …l_s8(vprod0x0123c3, vb0123c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
171 …l_s8(vprod0x4567c3, vb4567c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
D4x16c2-minmax-neon-mlal-padal-dup.c100 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local
130 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
143 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
156 …l_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
169 …l_s8(vprod0xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
182 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
195 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
208 …l_s8(vprod0x89ABc1, vb89ABc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
221 …l_s8(vprod0xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
234 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D1x16c8-minmax-neon-mlal-padal.c65 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
86 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
90 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
94 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
98 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
102 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
106 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
110 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
114 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
118 vprod0x8 = vmlal_s8(vprod0x8, vb8x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
[all …]
D1x16c2-minmax-neon-mlal-padal-dup.c53 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup() local
74 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
78 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
82 …l_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
86 …l_s8(vprod0xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
90 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
94 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
98 …l_s8(vprod0x89ABc1, vb89ABc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
102 …l_s8(vprod0xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
106 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
[all …]
D1x8c8-minmax-neon-mlal-padal.c57 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
70 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
74 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
78 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
82 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
86 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
90 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
94 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
98 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x8c2-minmax-neon-mlal-padal-dup.c51 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup() local
64 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
68 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
72 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
76 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
80 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
84 …l_s8(vprod0x4567c2, vb4567c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
88 …l_s8(vprod0x0123c3, vb0123c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
92 …l_s8(vprod0x4567c3, vb4567c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
D2x16c8-minmax-neon-mlal-padal.c87 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
111 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
118 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
125 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
132 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
139 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
146 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
153 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
160 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
167 vprod0x8 = vmlal_s8(vprod0x8, vb8x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
[all …]
D2x16c2-minmax-neon-mlal-padal-dup.c63 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup() local
87 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
94 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
101 …l_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
108 …l_s8(vprod0xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
115 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
122 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
129 …l_s8(vprod0x89ABc1, vb89ABc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
136 …l_s8(vprod0xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
143 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
[all …]
D2x8c8-minmax-neon-mlal-padal.c71 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
87 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
94 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
101 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
108 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
115 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
122 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
129 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
136 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c2-minmax-neon-mlal-padal-dup.c59 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup() local
75 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
82 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
89 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
96 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
103 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
110 …l_s8(vprod0x4567c2, vb4567c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
117 …l_s8(vprod0x0123c3, vb0123c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
124 …l_s8(vprod0x4567c3, vb4567c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
D3x16c8-minmax-neon-mlal-padal.c109 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
136 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
146 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
156 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
166 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
176 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
186 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
196 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
206 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
216 vprod0x8 = vmlal_s8(vprod0x8, vb8x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
[all …]
D3x16c2-minmax-neon-mlal-padal-dup.c73 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local
100 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
110 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
120 …l_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
130 …l_s8(vprod0xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
140 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
150 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
160 …l_s8(vprod0x89ABc1, vb89ABc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
170 …l_s8(vprod0xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
180 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
[all …]
D3x8c8-minmax-neon-mlal-padal.c85 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
104 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
114 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
124 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
134 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
144 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
154 vprod0x5 = vmlal_s8(vprod0x5, vb5x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
164 vprod0x6 = vmlal_s8(vprod0x6, vb6x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
174 vprod0x7 = vmlal_s8(vprod0x7, vb7x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c2-minmax-neon-mlal-padal-dup.c67 const int8x8_t va0x1 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local
86 …l_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
96 …l_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 0))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
106 …l_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
116 …l_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 1))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
126 …l_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
136 …l_s8(vprod0x4567c2, vb4567c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 2))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
146 …l_s8(vprod0x0123c3, vb0123c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
156 …l_s8(vprod0x4567c3, vb4567c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x1), 3))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()

12