Home
last modified time | relevance | path

Searched refs:vb0123x4567 (Results 1 – 16 of 16) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D8x8c4-minmax-neondot.c121 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local
127 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
129 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
131 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
133 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
135 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
137 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
139 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
141 vacc7x4567 = vdotq_lane_s32(vacc7x4567, vb0123x4567, va7x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
175 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local
[all …]
D6x8c4-minmax-neondot.c103 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local
109 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
111 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
113 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
115 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
117 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
119 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
147 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local
151 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
153 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
[all …]
D4x8c4-minmax-neondot.c85 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() local
91 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
93 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
95 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
97 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
119 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() local
123 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
125 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
127 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
129 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
D1x8c4-minmax-neondot.c58 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() local
64 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
77 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() local
81 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
D8x16c4-minmax-neondot.c137 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local
147 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
151 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
155 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
159 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
163 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
167 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
171 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
175 vacc7x4567 = vdotq_lane_s32(vacc7x4567, vb0123x4567, va7x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
227 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local
[all …]
D6x16c4-minmax-neondot.c115 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local
125 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
129 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
133 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
137 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
141 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
145 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
187 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local
193 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
197 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
[all …]
D4x16c4-minmax-neondot.c93 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local
103 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
107 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
111 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
115 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
147 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local
153 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
157 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
161 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
165 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
D1x16c4-minmax-neondot.c60 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local
70 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
87 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local
93 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
/external/XNNPACK/src/qs8-igemm/gen/
D8x8c4-minmax-neondot.c144 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local
150 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
152 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
154 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
156 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
158 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
160 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
162 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
164 vacc7x4567 = vdotq_lane_s32(vacc7x4567, vb0123x4567, va7x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
198 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local
[all …]
D6x8c4-minmax-neondot.c122 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local
128 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
130 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
132 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
134 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
136 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
138 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
166 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local
170 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
172 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
[all …]
D4x8c4-minmax-neondot.c100 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() local
106 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
108 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
110 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
112 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
134 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() local
138 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
140 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
142 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
144 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
D8x16c4-minmax-neondot.c160 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local
170 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
174 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
178 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
182 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
186 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
190 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
194 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
198 vacc7x4567 = vdotq_lane_s32(vacc7x4567, vb0123x4567, va7x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
250 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local
[all …]
D1x8c4-minmax-neondot.c67 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() local
73 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
86 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() local
90 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
D6x16c4-minmax-neondot.c134 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local
144 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
148 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
152 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
156 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
160 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
164 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
206 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local
212 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
216 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
[all …]
D4x16c4-minmax-neondot.c108 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local
118 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
122 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
126 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
130 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
162 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local
168 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
172 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
176 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
180 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
D1x16c4-minmax-neondot.c69 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local
79 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
96 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local
102 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()