Home
last modified time | relevance | path

Searched refs:vprod0x4 (Results 1 – 25 of 48) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D1x8c8-minmax-neon-mlal-padal.c85 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
86 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
87 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
121 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
122 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x8c16-minmax-neon-mlal-padal.c79 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local
80 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
81 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c113 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
115 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
117 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
170 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
172 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D1x16c8-minmax-neon-mlal-padal.c101 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
102 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
103 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
169 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
170 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c141 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
144 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
147 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
219 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
222 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c106 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
108 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
110 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c95 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
96 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
97 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D1x8c8-minmax-neon-mull-padal.c72 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() local
73 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c133 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
136 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
139 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c169 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
173 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
177 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
268 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
272 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c137 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
139 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
141 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
250 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
252 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c130 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
132 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
134 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c160 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
164 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
168 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D1x8c8-minmax-neon-mlal-padal.c96 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
97 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
98 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
132 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
133 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x8c16-minmax-neon-mlal-padal.c90 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local
91 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
92 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c126 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
128 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
130 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
183 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
185 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D1x16c8-minmax-neon-mlal-padal.c112 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
113 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
114 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
180 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
181 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c119 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
121 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
123 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D1x16c16-minmax-neon-mlal-padal.c106 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
107 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
108 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c156 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
159 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
162 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
234 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
237 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D1x8c8-minmax-neon-mull-padal.c83 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() local
84 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c148 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
151 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
154 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c150 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
152 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
154 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
263 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
265 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c186 int16x8_t vprod0x4 = vmull_s8(vb4x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
190 vprod0x4 = vmlal_s8(vprod0x4, vb4x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
194 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
285 const int16x8_t vprod0x4 = vmull_s8(vb4, va0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
289 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c177 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
181 vprod0x4 = vmlal_s8(vprod0x4, vget_high_s8(vb4), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
185 vacc0x4 = vpadalq_s16(vacc0x4, vprod0x4); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()

12