Home
last modified time | relevance | path

Searched refs:va0x0 (Results 1 – 25 of 32) sorted by relevance

12

/external/XNNPACK/src/qs8-igemm/gen/
D1x16c8-minmax-neon-mlal-padal.c75 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
96 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
100 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
104 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
108 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
112 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
116 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
120 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
124 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
128 int16x8_t vprod0x8 = vmull_s8(vb8x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
[all …]
D1x16c2-minmax-neon-mlal-padal-dup.c63 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup() local
83 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
87 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
91 …0x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
95 …0xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
99 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
103 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
107 …0x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
111 …0xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
115 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
[all …]
D1x8c2-minmax-neon-mlal-padal-dup.c61 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup() local
73 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
77 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
81 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
85 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
89 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
93 …0x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
97 …0x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
101 …0x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
D1x8c8-minmax-neon-mlal-padal.c67 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
80 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
84 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
88 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
92 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
96 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
100 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
104 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
108 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c99 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
122 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
129 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
136 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
143 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
150 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
157 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
164 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
171 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
178 int16x8_t vprod0x8 = vmull_s8(vb8x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
[all …]
D2x16c2-minmax-neon-mlal-padal-dup.c75 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup() local
97 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
104 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
111 …0x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
118 …0xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
125 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
132 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
139 …0x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
146 …0xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
153 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
[all …]
D2x8c8-minmax-neon-mlal-padal.c83 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
98 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
105 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
112 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
119 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
126 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
133 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
140 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
147 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c2-minmax-neon-mlal-padal-dup.c71 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup() local
85 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
92 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
99 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
106 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
113 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
120 …0x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
127 …0x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
134 …0x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
D3x16c2-minmax-neon-mlal-padal-dup.c87 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local
111 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
121 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
131 …0x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
141 …0xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
151 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
161 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
171 …0x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
181 …0xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
191 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
[all …]
D3x16c8-minmax-neon-mlal-padal.c123 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
148 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
158 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
168 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
178 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
188 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
198 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
208 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
218 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
228 int16x8_t vprod0x8 = vmull_s8(vb8x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
[all …]
D3x8c8-minmax-neon-mlal-padal.c99 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
116 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
126 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
136 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
146 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
156 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
166 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
176 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
186 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c2-minmax-neon-mlal-padal-dup.c81 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local
97 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
107 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
117 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
127 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
137 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
147 …0x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
157 …0x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
167 …0x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
D4x16c2-minmax-neon-mlal-padal-dup.c99 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup() local
125 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
138 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
151 …0x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
164 …0xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
177 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
190 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
203 …0x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
216 …0xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
229 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D1x16c8-minmax-neon-mlal-padal.c64 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
85 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
89 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
93 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
97 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
101 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
105 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
109 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
113 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
117 int16x8_t vprod0x8 = vmull_s8(vb8x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
[all …]
D1x16c2-minmax-neon-mlal-padal-dup.c52 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup() local
72 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
76 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
80 …0x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
84 …0xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
88 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
92 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
96 …0x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
100 …0xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
104 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
[all …]
D1x8c8-minmax-neon-mlal-padal.c56 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
69 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
73 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
77 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
81 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
85 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
89 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
93 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
97 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x8c2-minmax-neon-mlal-padal-dup.c50 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup() local
62 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
66 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
70 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
74 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
78 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
82 …0x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
86 …0x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
90 …0x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup()
D2x16c8-minmax-neon-mlal-padal.c86 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
109 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
116 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
123 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
130 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
137 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
144 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
151 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
158 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
165 int16x8_t vprod0x8 = vmull_s8(vb8x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
[all …]
D2x16c2-minmax-neon-mlal-padal-dup.c62 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup() local
84 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
91 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
98 …0x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
105 …0xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
112 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
119 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
126 …0x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
133 …0xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
140 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup()
[all …]
D2x8c8-minmax-neon-mlal-padal.c70 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
85 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
92 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
99 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
106 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
113 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
120 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
127 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
134 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c2-minmax-neon-mlal-padal-dup.c58 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup() local
72 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
79 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
86 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
93 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
100 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
107 …0x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
114 …0x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
121 …0x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup()
D3x16c8-minmax-neon-mlal-padal.c108 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
133 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
143 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
153 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
163 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
173 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
183 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
193 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
203 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
213 int16x8_t vprod0x8 = vmull_s8(vb8x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
[all …]
D3x16c2-minmax-neon-mlal-padal-dup.c72 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup() local
96 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
106 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
116 …0x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
126 …0xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
136 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
146 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
156 …0x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
166 …0xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
176 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup()
[all …]
D3x8c8-minmax-neon-mlal-padal.c84 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
101 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
111 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
121 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
131 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
141 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
151 int16x8_t vprod0x5 = vmull_s8(vb5x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
161 int16x8_t vprod0x6 = vmull_s8(vb6x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
171 int16x8_t vprod0x7 = vmull_s8(vb7x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c2-minmax-neon-mlal-padal-dup.c66 const int8x8_t va0x0 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup() local
82 …0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
92 …0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 0))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
102 …0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
112 …0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 1))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
122 …0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
132 …0x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 2))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
142 …0x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()
152 …0x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0x0), 3))); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup()

12