Lines Matching refs:sz
54 .macro add_sz dst1, dst2, in1, in2, in3, in4, sz argument
56 .ifc \sz, .16b
61 .macro sub_sz dst1, dst2, in1, in2, in3, in4, sz argument
63 .ifc \sz, .16b
68 .macro uaddw_sz dst1, dst2, in1, in2, in3, sz argument
70 .ifc \sz, .16b
75 .macro usubw_sz dst1, dst2, in1, in2, in3, sz argument
77 .ifc \sz, .16b
82 .macro usubl_sz dst1, dst2, in1, in2, sz argument
84 .ifc \sz, .16b
89 .macro sqxtn_sz dst, in1, in2, sz argument
91 .ifc \sz, .16b
96 .macro sqxtun_sz dst, in1, in2, sz argument
98 .ifc \sz, .16b
103 .macro mul_sz dst1, dst2, in1, in2, in3, in4, sz argument
105 .ifc \sz, .16b
110 .macro saddw_sz dst1, dst2, in1, in2, in3, sz argument
112 .ifc \sz, .16b
117 .macro ssubw_sz dst1, dst2, in1, in2, in3, sz argument
119 .ifc \sz, .16b
124 .macro uxtl_sz dst1, dst2, in, sz argument
126 .ifc \sz, .16b
131 .macro uaddl_sz dst1, dst2, in1, in2, sz argument
133 .ifc \sz, .16b
138 .macro rshrn_sz dst, in1, in2, shift, sz argument
140 .ifc \sz, .16b
145 .macro ushll_sz dst1, dst2, in, shift, sz argument
147 .ifc \sz, .16b
159 .macro loop_filter wd, sz, mix, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8
161 dup v0\sz, w2 // E
162 dup v2\sz, w3 // I
163 dup v3\sz, w4 // H
176 uabd v4\sz, v20\sz, v21\sz // abs(p3 - p2)
177 uabd v5\sz, v21\sz, v22\sz // abs(p2 - p1)
178 uabd v6\sz, v22\sz, v23\sz // abs(p1 - p0)
179 uabd v7\sz, v24\sz, v25\sz // abs(q0 - q1)
180 uabd \tmp1\sz, v25\sz, v26\sz // abs(q1 - q2)
181 uabd \tmp2\sz, v26\sz, v27\sz // abs(q2 - q3)
182 umax v4\sz, v4\sz, v5\sz
183 umax v5\sz, v6\sz, v7\sz
184 umax \tmp1\sz, \tmp1\sz, \tmp2\sz
185 uabd v6\sz, v23\sz, v24\sz // abs(p0 - q0)
186 umax v4\sz, v4\sz, v5\sz
187 uqadd v6\sz, v6\sz, v6\sz // abs(p0 - q0) * 2
188 uabd v5\sz, v22\sz, v25\sz // abs(p1 - q1)
189 umax v4\sz, v4\sz, \tmp1\sz // max(abs(p3 - p2), ..., abs(q2 - q3))
190 ushr v5\sz, v5\sz, #1
191 cmhs v4\sz, v2\sz, v4\sz // max(abs()) <= I
192 uqadd v6\sz, v6\sz, v5\sz // abs(p0 - q0) * 2 + abs(p1 - q1) >> 1
193 cmhs v5\sz, v0\sz, v6\sz
194 and v4\sz, v4\sz, v5\sz // fm
198 .ifc \sz, .16b
207 movi v0\sz, #1
209 uabd v6\sz, v20\sz, v23\sz // abs(p3 - p0)
210 uabd v2\sz, v21\sz, v23\sz // abs(p2 - p0)
211 uabd v1\sz, v22\sz, v23\sz // abs(p1 - p0)
212 uabd \tmp1\sz, v25\sz, v24\sz // abs(q1 - q0)
213 uabd \tmp2\sz, v26\sz, v24\sz // abs(q2 - q0)
214 uabd \tmp3\sz, v27\sz, v24\sz // abs(q3 - q0)
215 umax v6\sz, v6\sz, v2\sz
216 umax v1\sz, v1\sz, \tmp1\sz
217 umax \tmp2\sz, \tmp2\sz, \tmp3\sz
219 uabd v7\sz, v16\sz, v23\sz // abs(p7 - p0)
220 umax v6\sz, v6\sz, v1\sz
221 uabd v2\sz, v17\sz, v23\sz // abs(p6 - p0)
222 umax v6\sz, v6\sz, \tmp2\sz
223 uabd v1\sz, v18\sz, v23\sz // abs(p5 - p0)
224 cmhs v6\sz, v0\sz, v6\sz // flat8in
225 uabd v8\sz, v19\sz, v23\sz // abs(p4 - p0)
226 and v6\sz, v6\sz, v4\sz // flat8in && fm
227 uabd v9\sz, v28\sz, v24\sz // abs(q4 - q0)
228 bic v4\sz, v4\sz, v6\sz // fm && !flat8in
229 uabd v10\sz, v29\sz, v24\sz // abs(q5 - q0)
230 uabd v11\sz, v30\sz, v24\sz // abs(q6 - q0)
231 uabd v12\sz, v31\sz, v24\sz // abs(q7 - q0)
233 umax v7\sz, v7\sz, v2\sz
234 umax v1\sz, v1\sz, v8\sz
235 umax v9\sz, v9\sz, v10\sz
236 umax v11\sz, v11\sz, v12\sz
244 uabd v5\sz, v22\sz, v23\sz // abs(p1 - p0)
246 umax v7\sz, v7\sz, v1\sz
247 umax v9\sz, v9\sz, v11\sz
249 umax v6\sz, v6\sz, v1\sz
251 uabd v1\sz, v25\sz, v24\sz // abs(q1 - q0)
253 umax v7\sz, v7\sz, v9\sz
255 umax v6\sz, v6\sz, \tmp2\sz
257 usubl_sz \tmp1\().8h, \tmp2\().8h, v22, v25, \sz // p1 - q1
258 umax v5\sz, v5\sz, v1\sz // max(abs(p1 - p0), abs(q1 - q0))
262 usubl_sz \tmp3\().8h, \tmp4\().8h, v24, v23, \sz // q0 - p0
265 cmhs v6\sz, v0\sz, v6\sz // flat8in
270 cmhs v5\sz, v3\sz, v5\sz // !hev
274 and v6\sz, v6\sz, v1.16b
276 and v6\sz, v6\sz, v4\sz // flat8in && fm
278 sqxtn_sz \tmp1, \tmp1\().8h, \tmp2\().8h, \sz // av_clip_int8(p1 - q1)
280 cmhs v7\sz, v0\sz, v7\sz // flat8out
282 bic v4\sz, v4\sz, v6\sz // fm && !flat8in
284 and v5\sz, v5\sz, v4\sz // !hev && fm && !flat8in
286 and v7\sz, v7\sz, v6\sz // flat8out && flat8in && fm
289 …p3\().8h, \tmp4\().8h, \tmp3\().8h, \tmp4\().8h, \tmp5\().8h, \tmp5\().8h, \sz // 3 * (q0 - p0)
290 bic \tmp1\sz, \tmp1\sz, v5\sz // if (!hev) av_clip_int8 = 0
291 movi v2\sz, #4
292 …saddw_sz \tmp3\().8h, \tmp4\().8h, \tmp3\().8h, \tmp4\().8h, \tmp1, \sz // 3 * (q0 - p0)…
293 movi v3\sz, #3
294 sqxtn_sz \tmp1, \tmp3\().8h, \tmp4\().8h, \sz // f
296 bic v6\sz, v6\sz, v7\sz // fm && flat8in && !flat8out
299 sqadd \tmp3\sz, \tmp1\sz, v2\sz // FFMIN(f + 4, 127)
300 sqadd \tmp4\sz, \tmp1\sz, v3\sz // FFMIN(f + 3, 127)
301 uxtl_sz v0.8h, v1.8h, v23, \sz // p0
302 sshr \tmp3\sz, \tmp3\sz, #3 // f1
303 sshr \tmp4\sz, \tmp4\sz, #3 // f2
305 uxtl_sz v2.8h, v3.8h, v24, \sz // q0
306 saddw_sz v0.8h, v1.8h, v0.8h, v1.8h, \tmp4, \sz // p0 + f2
307 ssubw_sz v2.8h, v3.8h, v2.8h, v3.8h, \tmp3, \sz // q0 - f1
308 sqxtun_sz v0, v0.8h, v1.8h, \sz // out p0
309 sqxtun_sz v1, v2.8h, v3.8h, \sz // out q0
310 srshr \tmp3\sz, \tmp3\sz, #1 // f = (f1 + 1) >> 1
311 bit v23\sz, v0\sz, v4\sz // if (fm && !flat8in)
312 bit v24\sz, v1\sz, v4\sz
314 uxtl_sz v0.8h, v1.8h, v22, \sz // p1
315 uxtl_sz v2.8h, v3.8h, v25, \sz // q1
318 .ifc \sz, .16b
322 saddw_sz v0.8h, v1.8h, v0.8h, v1.8h, \tmp3, \sz // p1 + f
323 ssubw_sz v2.8h, v3.8h, v2.8h, v3.8h, \tmp3, \sz // q1 - f
324 sqxtun_sz v0, v0.8h, v1.8h, \sz // out p1
325 sqxtun_sz v2, v2.8h, v3.8h, \sz // out q1
327 .ifc \sz, .16b
331 bit v22\sz, v0\sz, v5\sz // if (!hev && fm && !flat8in)
332 bit v25\sz, v2\sz, v5\sz
337 .ifc \sz, .16b
344 uaddl_sz \tmp1\().8h, \tmp2\().8h, v20, v21, \sz
345 uaddl_sz \tmp3\().8h, \tmp4\().8h, v22, v25, \sz
346 uaddl_sz \tmp5\().8h, \tmp6\().8h, v20, v22, \sz
347 uaddl_sz \tmp7\().8h, \tmp8\().8h, v23, v26, \sz
348 add_sz v0.8h, v1.8h, \tmp1\().8h, \tmp2\().8h, \tmp1\().8h, \tmp2\().8h, \sz
349 uaddw_sz v0.8h, v1.8h, v0.8h, v1.8h, v23, \sz
350 uaddw_sz v0.8h, v1.8h, v0.8h, v1.8h, v24, \sz
351 add_sz v0.8h, v1.8h, v0.8h, v1.8h, \tmp5\().8h, \tmp6\().8h, \sz
352 …sub_sz \tmp3\().8h, \tmp4\().8h, \tmp3\().8h, \tmp4\().8h, \tmp1\().8h, \tmp2\().8h, \sz
353 …sub_sz \tmp7\().8h, \tmp8\().8h, \tmp7\().8h, \tmp8\().8h, \tmp5\().8h, \tmp6\().8h, \sz
354 rshrn_sz v2, v0.8h, v1.8h, #3, \sz // out p2
356 add_sz v0.8h, v1.8h, v0.8h, v1.8h, \tmp3\().8h, \tmp4\().8h, \sz
357 uaddl_sz \tmp1\().8h, \tmp2\().8h, v20, v23, \sz
358 uaddl_sz \tmp3\().8h, \tmp4\().8h, v24, v27, \sz
359 rshrn_sz v3, v0.8h, v1.8h, #3, \sz // out p1
361 add_sz v0.8h, v1.8h, v0.8h, v1.8h, \tmp7\().8h, \tmp8\().8h, \sz
362 …sub_sz \tmp3\().8h, \tmp4\().8h, \tmp3\().8h, \tmp4\().8h, \tmp1\().8h, \tmp2\().8h, \sz
363 uaddl_sz \tmp5\().8h, \tmp6\().8h, v21, v24, \sz
364 uaddl_sz \tmp7\().8h, \tmp8\().8h, v25, v27, \sz
365 rshrn_sz v4, v0.8h, v1.8h, #3, \sz // out p0
367 add_sz v0.8h, v1.8h, v0.8h, v1.8h, \tmp3\().8h, \tmp4\().8h, \sz
368 …sub_sz \tmp7\().8h, \tmp8\().8h, \tmp7\().8h, \tmp8\().8h, \tmp5\().8h, \tmp6\().8h, \sz
369 uaddl_sz \tmp1\().8h, \tmp2\().8h, v22, v25, \sz
370 uaddl_sz \tmp3\().8h, \tmp4\().8h, v26, v27, \sz
371 rshrn_sz v5, v0.8h, v1.8h, #3, \sz // out q0
373 add_sz v0.8h, v1.8h, v0.8h, v1.8h, \tmp7\().8h, \tmp8\().8h, \sz
374 …sub_sz \tmp3\().8h, \tmp4\().8h, \tmp3\().8h, \tmp4\().8h, \tmp1\().8h, \tmp2\().8h, \sz
375 rshrn_sz \tmp5, v0.8h, v1.8h, #3, \sz // out q1
377 add_sz v0.8h, v1.8h, v0.8h, v1.8h, \tmp3\().8h, \tmp4\().8h, \sz
381 bit v21\sz, v2\sz, v6\sz
382 bit v22\sz, v3\sz, v6\sz
383 bit v23\sz, v4\sz, v6\sz
384 rshrn_sz \tmp6, v0.8h, v1.8h, #3, \sz // out q2
385 bit v24\sz, v5\sz, v6\sz
386 bit v25\sz, \tmp5\sz, v6\sz
387 bit v26\sz, \tmp6\sz, v6\sz
391 orr v2\sz, v6\sz, v7\sz
393 .ifc \sz, .16b
406 .ifc \sz, .16b
421 ushll_sz v0.8h, v1.8h, v16, #3, \sz // 8 * v16
422 usubw_sz v0.8h, v1.8h, v0.8h, v1.8h, v16, \sz // 7 * v16
423 uaddw_sz v0.8h, v1.8h, v0.8h, v1.8h, v17, \sz
424 uaddl_sz v8.8h, v9.8h, v17, v18, \sz
425 uaddl_sz v10.8h, v11.8h, v19, v20, \sz
426 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v8.8h, v9.8h, \sz
427 uaddl_sz v8.8h, v9.8h, v16, v17, \sz
428 uaddl_sz v12.8h, v13.8h, v21, v22, \sz
429 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v10.8h, v11.8h, \sz
430 uaddl_sz v10.8h, v11.8h, v18, v25, \sz
431 uaddl_sz v14.8h, v15.8h, v23, v24, \sz
432 sub_sz v10.8h, v11.8h, v10.8h, v11.8h, v8.8h, v9.8h, \sz
433 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v12.8h, v13.8h, \sz
434 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v14.8h, v15.8h, \sz
435 uaddl_sz v12.8h, v13.8h, v16, v18, \sz
436 uaddl_sz v14.8h, v15.8h, v19, v26, \sz
437 rshrn_sz v2, v0.8h, v1.8h, #4, \sz
439 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v10.8h, v11.8h, \sz
440 uaddl_sz v8.8h, v9.8h, v16, v19, \sz
441 uaddl_sz v10.8h, v11.8h, v20, v27, \sz
442 sub_sz v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
443 bif v2\sz, v17\sz, v7\sz
444 rshrn_sz v3, v0.8h, v1.8h, #4, \sz
446 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v14.8h, v15.8h, \sz
447 uaddl_sz v12.8h, v13.8h, v16, v20, \sz
448 uaddl_sz v14.8h, v15.8h, v21, v28, \sz
449 sub_sz v10.8h, v11.8h, v10.8h, v11.8h, v8.8h, v9.8h, \sz
450 bif v3\sz, v18\sz, v7\sz
451 rshrn_sz v4, v0.8h, v1.8h, #4, \sz
453 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v10.8h, v11.8h, \sz
454 uaddl_sz v8.8h, v9.8h, v16, v21, \sz
455 uaddl_sz v10.8h, v11.8h, v22, v29, \sz
456 sub_sz v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
457 bif v4\sz, v19\sz, v7\sz
458 rshrn_sz v5, v0.8h, v1.8h, #4, \sz
460 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v14.8h, v15.8h, \sz
461 uaddl_sz v12.8h, v13.8h, v16, v22, \sz
462 uaddl_sz v14.8h, v15.8h, v23, v30, \sz
463 sub_sz v10.8h, v11.8h, v10.8h, v11.8h, v8.8h, v9.8h, \sz
464 bif v5\sz, v20\sz, v7\sz
465 rshrn_sz v6, v0.8h, v1.8h, #4, \sz
467 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v10.8h, v11.8h, \sz
468 uaddl_sz v10.8h, v11.8h, v16, v23, \sz
469 sub_sz v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
470 uaddl_sz v12.8h, v13.8h, v24, v31, \sz
471 bif v6\sz, v21\sz, v7\sz
472 rshrn_sz v8, v0.8h, v1.8h, #4, \sz
474 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v14.8h, v15.8h, \sz
475 sub_sz v10.8h, v11.8h, v12.8h, v13.8h, v10.8h, v11.8h, \sz
476 uaddl_sz v12.8h, v13.8h, v17, v24, \sz
477 uaddl_sz v14.8h, v15.8h, v25, v31, \sz
478 bif v8\sz, v22\sz, v7\sz
479 rshrn_sz v9, v0.8h, v1.8h, #4, \sz
481 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v10.8h, v11.8h, \sz
482 sub_sz v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
483 uaddl_sz v12.8h, v13.8h, v26, v31, \sz
484 bif v9\sz, v23\sz, v7\sz
485 rshrn_sz v10, v0.8h, v1.8h, #4, \sz
487 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v14.8h, v15.8h, \sz
488 uaddl_sz v14.8h, v15.8h, v18, v25, \sz
489 uaddl_sz v18.8h, v19.8h, v19, v26, \sz
490 sub_sz v12.8h, v13.8h, v12.8h, v13.8h, v14.8h, v15.8h, \sz
491 uaddl_sz v14.8h, v15.8h, v27, v31, \sz
492 bif v10\sz, v24\sz, v7\sz
493 rshrn_sz v11, v0.8h, v1.8h, #4, \sz
495 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v12.8h, v13.8h, \sz
496 uaddl_sz v12.8h, v13.8h, v20, v27, \sz
497 sub_sz v14.8h, v15.8h, v14.8h, v15.8h, v18.8h, v19.8h, \sz
498 uaddl_sz v18.8h, v19.8h, v28, v31, \sz
499 bif v11\sz, v25\sz, v7\sz
500 sub_sz v18.8h, v19.8h, v18.8h, v19.8h, v12.8h, v13.8h, \sz
501 rshrn_sz v12, v0.8h, v1.8h, #4, \sz
503 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v14.8h, v15.8h, \sz
504 uaddl_sz v14.8h, v15.8h, v21, v28, \sz
505 uaddl_sz v20.8h, v21.8h, v29, v31, \sz
506 bif v12\sz, v26\sz, v7\sz
507 rshrn_sz v13, v0.8h, v1.8h, #4, \sz
509 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v18.8h, v19.8h, \sz
510 sub_sz v20.8h, v21.8h, v20.8h, v21.8h, v14.8h, v15.8h, \sz
511 uaddl_sz v18.8h, v19.8h, v22, v29, \sz
512 uaddl_sz v22.8h, v23.8h, v30, v31, \sz
513 bif v13\sz, v27\sz, v7\sz
514 rshrn_sz v14, v0.8h, v1.8h, #4, \sz
516 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v20.8h, v21.8h, \sz
517 sub_sz v22.8h, v23.8h, v22.8h, v23.8h, v18.8h, v19.8h, \sz
518 bif v14\sz, v28\sz, v7\sz
519 rshrn_sz v15, v0.8h, v1.8h, #4, \sz
521 add_sz v0.8h, v1.8h, v0.8h, v1.8h, v22.8h, v23.8h, \sz
522 bif v15\sz, v29\sz, v7\sz
523 rshrn_sz v17, v0.8h, v1.8h, #4, \sz
524 bif v17\sz, v30\sz, v7\sz