Home
last modified time | relevance | path

Searched refs:vget_low_s8 (Results 1 – 25 of 187) sorted by relevance

12345678

/external/XNNPACK/src/qs8-igemm/gen/
D4x16c16-minmax-neon-mlal-padal.c169 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
170 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
171 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
172 int16x8_t vprod3x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
181 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
182 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
183 int16x8_t vprod2x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
184 int16x8_t vprod3x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
193 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
194 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
[all …]
D3x16c16-minmax-neon-mlal-padal.c144 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
145 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
146 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
153 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
154 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
155 int16x8_t vprod2x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
162 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
163 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
164 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
171 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D4x8c16-minmax-neon-mlal-padal.c129 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
130 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
131 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
132 int16x8_t vprod3x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
141 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
142 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
143 int16x8_t vprod2x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
144 int16x8_t vprod3x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va3)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
153 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
154 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
[all …]
D3x8c16-minmax-neon-mlal-padal.c112 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
113 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
114 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
121 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
122 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
123 int16x8_t vprod2x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
130 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
131 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
132 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
139 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
[all …]
D2x16c16-minmax-neon-mlal-padal.c119 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
120 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
125 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
126 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
131 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
132 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
137 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
138 int16x8_t vprod1x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
143 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
144 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
[all …]
D2x8c16-minmax-neon-mlal-padal.c95 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
96 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
101 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
102 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
107 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
108 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
113 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
114 int16x8_t vprod1x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
119 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
120 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
[all …]
D1x16c16-minmax-neon-mlal-padal.c94 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
97 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
100 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
103 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
106 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
109 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
112 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
115 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
118 int16x8_t vprod0x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
121 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
[all …]
D1x8c16-minmax-neon-mlal-padal.c78 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
81 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
84 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
87 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
90 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
93 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
96 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
99 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-gemm/gen/
D4x16c16-minmax-neon-mlal-padal.c152 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
153 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
154 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
155 int16x8_t vprod3x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
164 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
165 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
166 int16x8_t vprod2x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
167 int16x8_t vprod3x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
176 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
177 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
[all …]
D3x16c16-minmax-neon-mlal-padal.c129 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
130 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
131 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
138 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
139 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
140 int16x8_t vprod2x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
147 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
148 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
149 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
156 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D2x16c16-minmax-neon-mlal-padal.c106 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
107 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
112 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
113 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
118 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
119 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
124 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
125 int16x8_t vprod1x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
130 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
131 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
[all …]
D4x8c16-minmax-neon-mlal-padal.c112 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
113 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
114 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
115 int16x8_t vprod3x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
124 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
125 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
126 int16x8_t vprod2x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
127 int16x8_t vprod3x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va3)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
136 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
137 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
[all …]
D3x8c16-minmax-neon-mlal-padal.c97 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
98 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
99 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
106 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
107 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
108 int16x8_t vprod2x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
115 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
116 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
117 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
124 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
[all …]
D2x8c16-minmax-neon-mlal-padal.c82 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
83 int16x8_t vprod1x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
88 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
89 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
94 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
95 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
100 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
101 int16x8_t vprod1x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
106 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
107 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
[all …]
D1x16c16-minmax-neon-mlal-padal.c83 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
86 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
89 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
92 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
95 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
98 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
101 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
104 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
107 int16x8_t vprod0x8 = vmull_s8(vget_low_s8(vb8), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
110 int16x8_t vprod0x9 = vmull_s8(vget_low_s8(vb9), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
[all …]
D1x8c16-minmax-neon-mlal-padal.c67 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
70 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
73 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
76 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
79 int16x8_t vprod0x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
82 int16x8_t vprod0x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
85 int16x8_t vprod0x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
88 int16x8_t vprod0x7 = vmull_s8(vget_low_s8(vb7), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/
Ddepthwiseconv_3x3_filter_common.h48 repacked_data.val[0] = vget_low_s8(a.val[0]); in vqtbl4q_s8()
49 repacked_data.val[1] = vget_low_s8(a.val[1]); in vqtbl4q_s8()
50 repacked_data.val[2] = vget_low_s8(a.val[2]); in vqtbl4q_s8()
51 repacked_data.val[3] = vget_low_s8(a.val[3]); in vqtbl4q_s8()
53 vcombine_s8(vtbl4_s8(repacked_data, vget_low_s8(deleted_bit_3)), in vqtbl4q_s8()
62 vcombine_s8(vtbl4_s8(repacked_data, vget_low_s8(deleted_bit_3)), in vqtbl4q_s8()
132 return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 0); in vdotq_four_lane_s32()
134 return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 1); in vdotq_four_lane_s32()
148 int32x4_t sum0 = vpaddlq_s16(vmull_s8(vget_low_s8(lhs), vget_low_s8(rhs))); in vdotq_s32()
159 vdup_lane_s32(vreinterpret_s32_s8(vget_low_s8(rhs)), 0)); in vdotq_four_lane_s32()
[all …]
/external/libgav1/libgav1/src/dsp/arm/
Dwarp_neon.cc74 int8x8_t src_row_window = vget_low_s8(src_row_centered); in HorizontalFilter()
77 src_row_window = vget_low_s8(vextq_s8(src_row_centered, src_row_centered, 1)); in HorizontalFilter()
80 src_row_window = vget_low_s8(vextq_s8(src_row_centered, src_row_centered, 2)); in HorizontalFilter()
83 src_row_window = vget_low_s8(vextq_s8(src_row_centered, src_row_centered, 3)); in HorizontalFilter()
86 src_row_window = vget_low_s8(vextq_s8(src_row_centered, src_row_centered, 4)); in HorizontalFilter()
89 src_row_window = vget_low_s8(vextq_s8(src_row_centered, src_row_centered, 5)); in HorizontalFilter()
92 src_row_window = vget_low_s8(vextq_s8(src_row_centered, src_row_centered, 6)); in HorizontalFilter()
95 src_row_window = vget_low_s8(vextq_s8(src_row_centered, src_row_centered, 7)); in HorizontalFilter()
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-neon-ld64-x8.c57 vout01234567 = vmax_s8(vout01234567, vget_low_s8(voutput_min)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
59 vout01234567 = vmin_s8(vout01234567, vget_low_s8(voutput_max)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
81 vout01234567 = vmax_s8(vout01234567, vget_low_s8(voutput_min)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
82 vout01234567 = vmin_s8(vout01234567, vget_low_s8(voutput_max)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
Dminmax-neon-ld64-x24.c77 voutGHIJKLMN = vmax_s8(voutGHIJKLMN, vget_low_s8(voutput_min)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
80 voutGHIJKLMN = vmin_s8(voutGHIJKLMN, vget_low_s8(voutput_max)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
103 vout01234567 = vmax_s8(vout01234567, vget_low_s8(voutput_min)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
104 vout01234567 = vmin_s8(vout01234567, vget_low_s8(voutput_max)); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24()
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-neon-ld64-x8.c57 vout01234567 = vmax_s8(vout01234567, vget_low_s8(voutput_min)); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
59 vout01234567 = vmin_s8(vout01234567, vget_low_s8(voutput_max)); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
86 vout01234567 = vmax_s8(vout01234567, vget_low_s8(voutput_min)); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
87 vout01234567 = vmin_s8(vout01234567, vget_low_s8(voutput_max)); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
/external/libvpx/libvpx/vp8/common/arm/neon/
Dloopfiltersimplehorizontaledge_neon.c52 q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)), in vp8_loop_filter_simple_horizontal_edge_neon()
53 vget_low_s8(vreinterpretq_s8_u8(q6u8))); in vp8_loop_filter_simple_horizontal_edge_neon()
65 q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8)); in vp8_loop_filter_simple_horizontal_edge_neon()
/external/libhevc/common/arm/
Dihevc_sao_edge_offset_class1_chroma.s179 VTBL.8 D12,{D6},D12 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
189 VTBL.8 D12,{D7},D12 @offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx))
197 VTBL.8 D22,{D6},D22 @II vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
209 VTBL.8 D24,{D7},D22 @offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx))
214 @VTBL.8 D24,D7,D22 @II offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx))
255 VTBL.8 D22,{D6},D22 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
263 @VTBL.8 D24,D7,D22 @offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx))
334 VTBL.8 D12,{D6},D12 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
346 @VTBL.8 D12,D7,D12 @offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx))
352 VTBL.8 D22,{D6},D22 @II vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
[all …]
Dihevc_sao_edge_offset_class1.s175 VTBL.8 D12,{D6},D12 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
184 VTBL.8 D12,{D7},D12 @offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx))
189 VTBL.8 D22,{D6},D22 @II vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
202 VTBL.8 D24,{D7},D22 @II offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx))
243 VTBL.8 D22,{D6},D22 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
246 VTBL.8 D24,{D7},D22 @offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx))
317 VTBL.8 D12,{D6},D12 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
321 VTBL.8 D12,{D7},D12 @offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx))
328 VTBL.8 D22,{D6},D22 @II vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
332 VTBL.8 D24,{D7},D22 @II offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx))
[all …]
/external/gemmlowp/standalone/
Dneon-gemm-kernel-benchmark.cc4577 vmull_s8(vget_low_s8(lhs[i]), vget_low_s8(rhs[j])); in Run()
4766 local_acc[i][0] = vmull_s8(vget_low_s8(lhs[i][0]), in Run()
4767 vget_low_s8(rhs[0])); in Run()
4768 local_acc[i][0] = vmlal_s8(local_acc[i][0], vget_low_s8(lhs[i][1]), in Run()
4769 vget_low_s8(rhs[2])); in Run()
4770 local_acc[i][1] = vmull_s8(vget_low_s8(lhs[i][0]), in Run()
4771 vget_low_s8(rhs[1])); in Run()
4773 vget_low_s8(lhs[i][1]), in Run()
4774 vget_low_s8(rhs[3])); in Run()
4776 local_acc[i][0] = vmlal_s8(local_acc[i][0], vget_low_s8(lhs[i][0]), in Run()
[all …]

12345678