Lines Matching refs:p0
138 ; p0 = clip(p0 + clip((q0 - p0 + ((p1 - q1) >> 2) + 1) >> 1, -iTc, iTc), 0, 255)
139 ; q0 = clip(q0 - clip((q0 - p0 + ((p1 - q1) >> 2) + 1) >> 1, -iTc, iTc), 0, 255)
142 ; p1=%1 p0=%2 q0=%3 q1=%4 iTc=%5 FFh=%6 xmmclobber=%7,%8
144 ; (q0 - p0 + ((p1 - q1) >> 2) + 1) >> 1 clipped to [-96, 159] and biased to [0, 255].
148 ; q0 - p0 is split into a non-negative and non-positive part. The latter is
151 psubusb %7, %3 ; clip(p0 - q0, 0, 255)
156 psubusb %1, %7 ; -= clip(p0 - q0, 0, 255) saturate.
158 psubusb %8, %2 ; (clip(q0 - p0, 0, 255)
159 pavgb %8, %1 ; + clip(((p1 - q1 + 0x300) >> 2) - clip(p0 - q0, 0, 255), 0, 255) + 1) >> 1
163 ; Add/subtract each part to/from p0/q0 and clip.
170 paddusb %2, %8 ; p0
199 MOVDQ xmm6, [r3 + 0 * r1] ; p0
203 SSE2_AbsDiffUB xmm6, xmm0, xmm3 ; |p0 - q0|
204 SSE2_CmpltUB xmm6, xmm1, [pic(WELS_DB127_16)] ; bDeltaP0Q0 = |p0 - q0| < iAlpha
206 SSE2_AbsDiffUB xmm7, xmm4, xmm3 ; |p1 - p0|
208 pmaxub xmm7, xmm0 ; max(|p1 - p0|, |q1 - q0|)
209 …SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP1P0 & bDeltaQ1Q0 = max(|p1 - p0|, |q1 - q0…
213 SSE2_AbsDiffUB xmm7, xmm4, xmm3 ; |p2 - p0|
214 SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP2P0 = |p2 - p0| < iBeta
224 ; (p2 + ((p0 + q0 + 1) >> 1)) >> 1
228 ; (q2 + ((p0 + q0 + 1) >> 1)) >> 1
254 MOVDQ xmm1, [r3 + 0 * r1] ; p0
257 MOVDQ [r3 + 0 * r1], xmm1 ; store p0.
268 ; Compose 8-bit averages from pavgbs. Ie. (p1 + p0 + p2 + q0 + 2) >> 2 can be
269 ; written as (((p1 + p0) >> 1) + ((p2 + q0 + (p1 ^ p0 & 1)) >> 1) + 1) >> 1,
272 ; pPix=%1 iStride=%2 [in:q0,out:p0]=%3 [in:q1,out:p1]=%4 bDeltaP0Q0P1P0Q1Q0=%5 bDeltaP2P0=%6 clobbe…
282 SSE2_AvgbFloor1 %10, [%1], %12, %8 ; (p0 + p1) >> 1, p0 ^ p1
284 SSE2_AvgbFloor1 %7, %4, %8, %9 ; (t0 + t1 + (p0 ^ p1 & 1)) >> 1
286 SSE2_AvgbFloor1 %3, %9, %8, %4 ; (p2 + q0 + (p0 ^ p1 & 1)) >> 1
287 pavgb %7, %10 ; p0' = (p0 + p1 + t0 + t1 + 2) >> 2
291 pavgb %10, %3 ; p1' = (p0 + p1 + p2 + q0 + 2) >> 2
294 MOVDQ %3, [%1 + 0 * %2] ; p0
296 SSE2_Blend %7, %3, %4 ; p0out = bDeltaP0Q0P1P0Q1Q0 ? p0' : p0
298 SSE2_Blend %7, [%1 + 0 * %2], %5 ; p0out = bDeltaP0Q0P1P0Q1Q0 ? p0' : p0
300 MOVDQ [%1 + 0 * %2], %7 ; store p0
303 psubb %10, %8 ; (p0 + p1 + p2 + q0) >> 2
306 …SSE2_AvgbFloor2 %4, %9, %8 ; (p2 + p3 + ((p0 + p1) >> 1 ^ (p2 + q0 + (p0 ^ p…
349 MOVDQ xmm6, [r3 + 0 * r1] ; p0
352 SSE2_AbsDiffUB xmm6, xmm0, xmm5 ; |p0 - q0|
353 SSE2_CmpgeUB xmm3, xmm6 ; |p0 - q0| < (iAlpha >> 2) + 2
354 SSE2_CmpltUB xmm6, xmm1, [pic(WELS_DB127_16)] ; bDeltaP0Q0 = |p0 - q0| < iAlpha
356 SSE2_AbsDiffUB xmm7, xmm4, xmm5 ; |p1 - p0|
358 pmaxub xmm7, xmm0 ; max(|p1 - p0|, |q1 - q0|)
359 …SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP1P0 & bDeltaQ1Q0 = max(|p1 - p0|, |q1 - q0…
363 SSE2_AbsDiffUB xmm7, xmm4, xmm5 ; |p2 - p0|
364 SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP2P0 = |p2 - p0| < iBeta
365 pand xmm7, xmm3 ; &= |p0 - q0| < (iAlpha >> 2) + 2
371 pand xmm5, xmm3 ; &= |p0 - q0| < (iAlpha >> 2) + 2
397 ; [out:p1,p0,q0,q1]=%1,%2,%3,%4 pPixCb=%5 pPixCr=%6 iStride=%7 3*iStride-1=%8 xmmclobber=%9,%10,%11
399 movd %1, [%5 + 0 * %7 - 2] ; [p1,p0,q0,q1] cb line 0
400 movd %2, [%5 + 2 * %7 - 2] ; [p1,p0,q0,q1] cb line 2
401 punpcklbw %1, %2 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 0,2
402 movd %2, [%5 + 4 * %7 - 2] ; [p1,p0,q0,q1] cb line 4
403 movd %9, [%5 + 2 * %8] ; [p1,p0,q0,q1] cb line 6
404 punpcklbw %2, %9 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 4,6
405 …punpcklwd %1, %2 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cb line 0,2…
406 movd %2, [%6 + 0 * %7 - 2] ; [p1,p0,q0,q1] cr line 0
407 movd %9, [%6 + 2 * %7 - 2] ; [p1,p0,q0,q1] cr line 2
408 punpcklbw %2, %9 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 0,2
409 movd %9, [%6 + 4 * %7 - 2] ; [p1,p0,q0,q1] cr line 4
410 movd %10, [%6 + 2 * %8] ; [p1,p0,q0,q1] cr line 6
411 punpcklbw %9, %10 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 4,6
412 …punpcklwd %2, %9 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cr line 0,2…
415 movd %9, [%5 + 0 * %7 - 2] ; [p1,p0,q0,q1] cb line 1
416 movd %10, [%5 + 2 * %7 - 2] ; [p1,p0,q0,q1] cb line 3
417 punpcklbw %9, %10 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 1,3
418 movd %10, [%5 + 4 * %7 - 2] ; [p1,p0,q0,q1] cb line 5
419 movd %3, [%5 + 2 * %8] ; [p1,p0,q0,q1] cb line 7
420 punpcklbw %10, %3 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 5,7
421 …punpcklwd %9, %10 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cb line 1,3…
422 movd %10, [%6 + 0 * %7 - 2] ; [p1,p0,q0,q1] cr line 1
423 movd %3, [%6 + 2 * %7 - 2] ; [p1,p0,q0,q1] cr line 3
424 punpcklbw %10, %3 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 1,3
425 movd %3, [%6 + 4 * %7 - 2] ; [p1,p0,q0,q1] cr line 5
426 movd %4, [%6 + 2 * %8] ; [p1,p0,q0,q1] cr line 7
427 punpcklbw %3, %4 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 5,7
428 …punpcklwd %10, %3 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cr line 1,3…
430 …punpckldq %1, %2 ; [p1,p1,p1,p1,p1,p1,p1,p1,p0,p0,p0,p0,p0,p0,p0,p0] cb/cr line …
433 …punpckldq %9, %10 ; [p1,p1,p1,p1,p1,p1,p1,p1,p0,p0,p0,p0,p0,p0,p0,p0] cb/cr line …
437 …punpckhqdq %2, %9 ; [p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0] cb/cr line …
443 ; pPixCb+iStride=%1 pPixCr+iStride=%2 iStride=%3 3*iStride-1=%4 p0=%5 q0=%6 rclobber=%7 dwclobber={…
446 …punpcklbw %10, %6 ; [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr line …
447 …punpckhbw %5, %6 ; [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr line …
451 …movdqa [r7 ], %10 ; store [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr…
452 …movdqa [r7 + 16], %5 ; store [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr…
453 mov %8, [r7 + 16] ; [p0,q0,p0,q0] cb line 1,3
454 mov [%1 + 0 * %3 - 1], %9 ; store [p0,q0] cb line 1
455 shr %8, 16 ; [p0,q0] cb line 3
456 mov [%1 + 2 * %3 - 1], %9 ; store [p0,q0] cb line 3
457 mov %8, [r7 + 20] ; [p0,q0,p0,q0] cb line 5,7
458 mov [%1 + 4 * %3 - 1], %9 ; store [p0,q0] cb line 5
459 shr %8, 16 ; [p0,q0] cb line 7
460 mov [%1 + 2 * %4 + 1], %9 ; store [p0,q0] cb line 7
461 mov %8, [r7 + 24] ; [p0,q0,p0,q0] cr line 1,3
462 mov [%2 + 0 * %3 - 1], %9 ; store [p0,q0] cr line 1
463 shr %8, 16 ; [p0,q0] cr line 3
464 mov [%2 + 2 * %3 - 1], %9 ; store [p0,q0] cr line 3
465 mov %8, [r7 + 28] ; [p0,q0,p0,q0] cr line 5,7
466 mov [%2 + 4 * %3 - 1], %9 ; store [p0,q0] cr line 5
467 shr %8, 16 ; [p0,q0] cr line 7
468 mov [%2 + 2 * %4 + 1], %9 ; store [p0,q0] cr line 7
471 mov %8, [r7 ] ; [p0,q0,p0,q0] cb line 0,2
472 mov [%1 + 0 * %3 - 1], %9 ; store [p0,q0] cb line 0
473 shr %8, 16 ; [p0,q0] cb line 2
474 mov [%1 + 2 * %3 - 1], %9 ; store [p0,q0] cb line 2
475 mov %8, [r7 + 4] ; [p0,q0,p0,q0] cb line 4,6
476 mov [%1 + 4 * %3 - 1], %9 ; store [p0,q0] cb line 4
477 shr %8, 16 ; [p0,q0] cb line 6
478 mov [%1 + 2 * %4 + 1], %9 ; store [p0,q0] cb line 6
479 mov %8, [r7 + 8] ; [p0,q0,p0,q0] cr line 0,2
480 mov [%2 + 0 * %3 - 1], %9 ; store [p0,q0] cr line 0
481 shr %8, 16 ; [p0,q0] cr line 2
482 mov [%2 + 2 * %3 - 1], %9 ; store [p0,q0] cr line 2
483 mov %8, [r7 + 12] ; [p0,q0,p0,q0] cr line 4,6
484 mov [%2 + 4 * %3 - 1], %9 ; store [p0,q0] cr line 4
485 shr %8, 16 ; [p0,q0] cr line 6
486 mov [%2 + 2 * %4 + 1], %9 ; store [p0,q0] cr line 6
490 ; p1=%1 p0=%2 q0=%3 q1=%4 iAlpha=%5 iBeta=%6 pTC=%7 xmmclobber=%8,%9,%10 interleaveTC=%11
493 SSE2_AbsDiffUB %8, %2, %9 ; |p0 - q0|
494 SSE2_CmpgeUB %8, %5 ; !bDeltaP0Q0 = |p0 - q0| >= iAlpha
498 SSE2_AbsDiffUB %10, %2, %5 ; |p1 - p0|
499 pmaxub %9, %10 ; max(|q1 - q0|, |p1 - p0|)
503 …SSE2_CmpgeUB %9, %5 ; !bDeltaQ1Q0 | !bDeltaP1P0 = max(|q1 - q0|, |p1 - p0|) >= iBe…
520 ; p1=%1 p0=%2 q0=%3 q1=%4 iAlpha=%5 iBeta=%6 xmmclobber=%7,%8,%9
523 SSE2_AbsDiffUB %7, %2, %8 ; |p0 - q0|
524 SSE2_CmpgeUB %7, %5 ; !bDeltaP0Q0 = |p0 - q0| >= iAlpha
528 SSE2_AbsDiffUB %9, %2, %5 ; |p1 - p0|
529 pmaxub %8, %9 ; max(|q1 - q0|, |p1 - p0|)
533 … SSE2_CmpgeUB %8, %5 ; !bDeltaQ1Q0 | !bDeltaP1P0 = max(|q1 - q0|, |p1 - p0|) >= iBeta
537 SSE2_AvgbFloor1 %8, %4, %5, %9 ; (p0 + q1) >> 1
538 pavgb %8, %1 ; p0' = (p1 + ((p0 + q1) >> 1) + 1) >> 1
540 SSE2_Blend %2, %8, %7 ; p0out = bDeltaP0Q0P1P0Q1Q0 ? p0' : p0
566 movq xmm2, [r0 + 1 * r3] ; p0 cb
567 movhps xmm2, [r1 + 1 * r3] ; p0 cr
580 movlps [r0 + 1 * r3], xmm2 ; store p0 cb
581 movhps [r1 + 1 * r3], xmm2 ; store p0 cr
609 movq xmm2, [r0 + 1 * r3] ; p0 cb
610 movhps xmm2, [r1 + 1 * r3] ; p0 cr
618 movlps [r0 + 1 * r3], xmm2 ; store p0 cb
619 movhps [r1 + 1 * r3], xmm2 ; store p0 cr