1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 ******************************************************************************
23 * @file ih264e_distortion_metrics.c
24 *
25 * @brief
26 * This file contains definitions of routines that compute distortion
27 * between two macro/sub blocks of identical dimensions
28 *
29 * @author
30 * Ittiam
31 *
32 * @par List of Functions:
33 * - ime_sub_pel_compute_sad_16x16()
34 * - ime_calculate_sad4_prog()
35 * - ime_calculate_sad3_prog()
36 * - ime_calculate_sad2_prog()
37 * - ime_compute_sad_16x16()
38 * - ime_compute_sad_16x16_fast()
39 * - ime_compute_sad_16x16_ea8()
40 * - ime_compute_sad_8x8()
41 * - ime_compute_sad_4x4()
42 * - ime_compute_sad_16x8()
43 * - ime_compute_satqd_16x16_lumainter()
44 * - ime_compute_satqd_8x16_chroma()
45 * - ime_compute_satqd_16x16_lumaintra()
46 *
47 *
48 * @remarks
49 * None
50 *
51 *******************************************************************************
52 */
53
54 /*****************************************************************************/
55 /* File Includes */
56 /*****************************************************************************/
57
58 /* System include files */
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62
63 /* User include files */
64 #include "ime_typedefs.h"
65 #include "ime_defs.h"
66 #include "ime_macros.h"
67 #include "ime_statistics.h"
68 #include "ime_platform_macros.h"
69 #include "ime_distortion_metrics.h"
70
71
72 /*****************************************************************************/
73 /* Function Definitions */
74 /*****************************************************************************/
75
76 /**
77 ******************************************************************************
78 *
79 * @brief computes distortion (SAD) at all subpel points about the src location
80 *
81 * @par Description
82 * This functions computes SAD at all points at a subpel distance from the
83 * current source location.
84 *
85 * @param[in] pu1_src
86 * UWORD8 pointer to the source
87 *
88 * @param[out] pu1_ref_half_x
89 * UWORD8 pointer to half pel buffer
90 *
91 * @param[out] pu1_ref_half_y
92 * UWORD8 pointer to half pel buffer
93 *
94 * @param[out] pu1_ref_half_xy
95 * UWORD8 pointer to half pel buffer
96 *
97 * @param[in] src_strd
98 * integer source stride
99 *
100 * @param[in] ref_strd
101 * integer ref stride
102 *
103 * @param[out] pi4_sad
104 * integer evaluated sad
105 * pi4_sad[0] - half x
106 * pi4_sad[1] - half x - 1
107 * pi4_sad[2] - half y
108 * pi4_sad[3] - half y - 1
109 * pi4_sad[4] - half xy
110 * pi4_sad[5] - half xy - 1
111 * pi4_sad[6] - half xy - strd
112 * pi4_sad[7] - half xy - 1 - strd
113 *
114 * @remarks
115 *
116 ******************************************************************************
117 */
ime_sub_pel_compute_sad_16x16(UWORD8 * pu1_src,UWORD8 * pu1_ref_half_x,UWORD8 * pu1_ref_half_y,UWORD8 * pu1_ref_half_xy,WORD32 src_strd,WORD32 ref_strd,WORD32 * pi4_sad)118 void ime_sub_pel_compute_sad_16x16(UWORD8 *pu1_src,
119 UWORD8 *pu1_ref_half_x,
120 UWORD8 *pu1_ref_half_y,
121 UWORD8 *pu1_ref_half_xy,
122 WORD32 src_strd,
123 WORD32 ref_strd,
124 WORD32 *pi4_sad)
125 {
126 UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
127 UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
128 UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
129 UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
130 UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
131
132 WORD32 row, col;
133
134 memset(pi4_sad, 0, 8 * sizeof(WORD32));
135
136 for(row = 0; row < MB_SIZE; row++)
137 {
138 for(col = 0; col < MB_SIZE; col++)
139 {
140 WORD32 src;
141 WORD32 diff;
142
143 src = pu1_src[col];
144
145 diff = src - pu1_ref_half_x[col];
146 pi4_sad[0] += ABS(diff);
147
148 diff = src - pu1_ref_half_x_left[col];
149 pi4_sad[1] += ABS(diff);
150
151 diff = src - pu1_ref_half_y[col];
152 pi4_sad[2] += ABS(diff);
153
154 diff = src - pu1_ref_half_y_top[col];
155 pi4_sad[3] += ABS(diff);
156
157 diff = src - pu1_ref_half_xy[col];
158 pi4_sad[4] += ABS(diff);
159
160 diff = src - pu1_ref_half_xy_left[col];
161 pi4_sad[5] += ABS(diff);
162
163 diff = src - pu1_ref_half_xy_top[col];
164 pi4_sad[6] += ABS(diff);
165
166 diff = src - pu1_ref_half_xy_top_left[col];
167 pi4_sad[7] += ABS(diff);
168 }
169
170 pu1_src += src_strd;
171
172 pu1_ref_half_x += ref_strd;
173 pu1_ref_half_x_left += ref_strd;
174
175 pu1_ref_half_y += ref_strd;
176 pu1_ref_half_y_top += ref_strd;
177
178 pu1_ref_half_xy += ref_strd;
179 pu1_ref_half_xy_left += ref_strd;
180 pu1_ref_half_xy_top += ref_strd;
181 pu1_ref_half_xy_top_left += ref_strd;
182 }
183 }
184
185 /**
186 *******************************************************************************
187 *
188 * @brief compute sad
189 *
190 * @par Description: This function computes the sad at vertices of diamond grid
191 * centered at reference pointer and at unit distance from it.
192 *
193 * @param[in] pu1_ref
194 * UWORD8 pointer to the reference
195 *
196 * @param[out] pu1_src
197 * UWORD8 pointer to the source
198 *
199 * @param[in] ref_strd
200 * integer reference stride
201 *
202 * @param[in] src_strd
203 * integer source stride
204 *
205 * @param[out] pi4_sad
206 * pointer to integer array evaluated sad
207 *
208 * @returns sad at all evaluated vertexes
209 *
210 * @remarks none
211 *
212 *******************************************************************************
213 */
ime_calculate_sad4_prog(UWORD8 * pu1_ref,UWORD8 * pu1_src,WORD32 ref_strd,WORD32 src_strd,WORD32 * pi4_sad)214 void ime_calculate_sad4_prog(UWORD8 *pu1_ref,
215 UWORD8 *pu1_src,
216 WORD32 ref_strd,
217 WORD32 src_strd,
218 WORD32 *pi4_sad)
219 {
220
221 /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
222 UWORD8 *left_ptr = pu1_ref - 1;
223 UWORD8 *right_ptr = pu1_ref + 1;
224 UWORD8 *top_ptr = pu1_ref - ref_strd;
225 UWORD8 *bot_ptr = pu1_ref + ref_strd;
226
227 /* temp var */
228 WORD32 count2, count3;
229 UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
230 UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
231
232 memset(pi4_sad, 0, 4 * sizeof(WORD32));
233
234 for(count2 = MB_SIZE; count2 > 0; count2--)
235 {
236 for(count3 = MB_SIZE; count3 > 0 ; count3--)
237 {
238 WORD32 src;
239 WORD32 diff;
240
241 src = *pu1_src++;
242
243 diff = src - *left_ptr++;
244 pi4_sad[0] += ABS(diff);
245
246 diff = src - *right_ptr++;
247 pi4_sad[1] += ABS(diff);
248
249 diff = src - *top_ptr++;
250 pi4_sad[2] += ABS(diff);
251
252 diff = src - *bot_ptr++;
253 pi4_sad[3] += ABS(diff);
254 }
255
256 bot_ptr += u4_ref_buf_offset;
257 left_ptr += u4_ref_buf_offset;
258 right_ptr += u4_ref_buf_offset;
259 top_ptr += u4_ref_buf_offset;
260
261 pu1_src += u4_cur_buf_offset;
262 }
263
264 }
265
266 /**
267 *******************************************************************************
268 *
269 * @brief compute sad
270 *
271 * @par Description: This function computes the sad at vertices of diamond grid
272 * centered at reference pointer and at unit distance from it.
273 *
274 * @param[in] pu1_ref1, pu1_ref2, pu1_ref3
275 * UWORD8 pointer to the reference
276 *
277 * @param[out] pu1_src
278 * UWORD8 pointer to the source
279 *
280 * @param[in] ref_strd
281 * integer reference stride
282 *
283 * @param[in] src_strd
284 * integer source stride
285 *
286 * @param[out] pi4_sad
287 * pointer to integer array evaluated sad
288 *
289 * @returns sad at all evaluated vertexes
290 *
291 * @remarks none
292 *
293 *******************************************************************************
294 */
ime_calculate_sad3_prog(UWORD8 * pu1_ref1,UWORD8 * pu1_ref2,UWORD8 * pu1_ref3,UWORD8 * pu1_src,WORD32 ref_strd,WORD32 src_strd,WORD32 * pi4_sad)295 void ime_calculate_sad3_prog(UWORD8 *pu1_ref1,
296 UWORD8 *pu1_ref2,
297 UWORD8 *pu1_ref3,
298 UWORD8 *pu1_src,
299 WORD32 ref_strd,
300 WORD32 src_strd,
301 WORD32 *pi4_sad)
302 {
303 /* temp var */
304 WORD32 i;
305 UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
306 UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
307
308 for(i = 16; i > 0; i--)
309 {
310 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
311 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
312 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
313 pu1_src += 4;
314 pu1_ref1 += 4;
315 pu1_ref2 += 4;
316 pu1_ref3 += 4;
317
318 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
319 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
320 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
321 pu1_src += 4;
322 pu1_ref1 += 4;
323 pu1_ref2 += 4;
324 pu1_ref3 += 4;
325
326 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
327 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
328 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
329 pu1_src += 4;
330 pu1_ref1 += 4;
331 pu1_ref2 += 4;
332 pu1_ref3 += 4;
333
334 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
335 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
336 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
337 pu1_src += 4;
338 pu1_ref1 += 4;
339 pu1_ref2 += 4;
340 pu1_ref3 += 4;
341
342 pu1_src += u4_cur_buf_offset;
343 pu1_ref1 += u4_ref_buf_offset;
344 pu1_ref2 += u4_ref_buf_offset;
345 pu1_ref3 += u4_ref_buf_offset;
346 }
347
348 }
349
350 /**
351 *******************************************************************************
352 *
353 * @brief compute sad
354 *
355 * @par Description: This function computes the sad at vertices of diamond grid
356 * centered at reference pointer and at unit distance from it.
357 *
358 * @param[in] pu1_ref1, pu1_ref2
359 * UWORD8 pointer to the reference
360 *
361 * @param[out] pu1_src
362 * UWORD8 pointer to the source
363 *
364 * @param[in] ref_strd
365 * integer reference stride
366 *
367 * @param[in] src_strd
368 * integer source stride
369 *
370 * @param[out] pi4_sad
371 * pointer to integer array evaluated sad
372 *
373 * @returns sad at all evaluated vertexes
374 *
375 * @remarks none
376 *
377 *******************************************************************************
378 */
ime_calculate_sad2_prog(UWORD8 * pu1_ref1,UWORD8 * pu1_ref2,UWORD8 * pu1_src,WORD32 ref_strd,WORD32 src_strd,WORD32 * pi4_sad)379 void ime_calculate_sad2_prog(UWORD8 *pu1_ref1,
380 UWORD8 *pu1_ref2,
381 UWORD8 *pu1_src,
382 WORD32 ref_strd,
383 WORD32 src_strd,
384 WORD32 *pi4_sad)
385 {
386 /* temp var */
387 WORD32 i;
388 UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
389 UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
390
391 for(i = 16; i > 0; i--)
392 {
393 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
394 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
395 pu1_src += 4;
396 pu1_ref1 += 4;
397 pu1_ref2 += 4;
398
399 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
400 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
401 pu1_src += 4;
402 pu1_ref1 += 4;
403 pu1_ref2 += 4;
404
405 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
406 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
407 pu1_src += 4;
408 pu1_ref1 += 4;
409 pu1_ref2 += 4;
410
411 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
412 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
413 pu1_src += 4;
414 pu1_ref1 += 4;
415 pu1_ref2 += 4;
416
417 pu1_src += u4_cur_buf_offset;
418 pu1_ref1 += u4_ref_buf_offset;
419 pu1_ref2 += u4_ref_buf_offset;
420 }
421
422 }
423
424 /**
425 ******************************************************************************
426 *
427 * @brief computes distortion (SAD) between 2 16x16 blocks
428 *
429 * @par Description
430 * This functions computes SAD between 2 16x16 blocks. There is a provision
431 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
432 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
433 *
434 * @param[in] pu1_src
435 * UWORD8 pointer to the source
436 *
437 * @param[out] pu1_dst
438 * UWORD8 pointer to the destination
439 *
440 * @param[in] src_strd
441 * integer source stride
442 *
443 * @param[in] dst_strd
444 * integer destination stride
445 *
446 * @param[in] i4_max_sad
447 * integer maximum allowed distortion
448 *
449 * @param[out] pi4_mb_distortion
450 * integer evaluated sad
451 *
452 * @remarks
453 *
454 ******************************************************************************
455 */
ime_compute_sad_16x16(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)456 void ime_compute_sad_16x16(UWORD8 *pu1_src,
457 UWORD8 *pu1_est,
458 WORD32 src_strd,
459 WORD32 est_strd,
460 WORD32 i4_max_sad,
461 WORD32 *pi4_mb_distortion)
462 {
463 WORD32 i4_sad = 0;
464 UWORD32 u4_src_offset = src_strd - 16;
465 UWORD32 u4_est_offset = est_strd - 16;
466 UWORD32 i;
467
468 GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16);
469
470 for(i = 16; i > 0; i--)
471 {
472 USADA8(pu1_src, pu1_est, i4_sad);
473 pu1_src += 4;
474 pu1_est += 4;
475
476 USADA8(pu1_src, pu1_est, i4_sad);
477 pu1_src += 4;
478 pu1_est += 4;
479
480 USADA8(pu1_src, pu1_est, i4_sad);
481 pu1_src += 4;
482 pu1_est += 4;
483
484 USADA8(pu1_src, pu1_est, i4_sad);
485 pu1_src += 4;
486 pu1_est += 4;
487
488 /* early exit */
489 if(i4_max_sad < i4_sad)
490 {
491
492 GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16-i);
493
494 *pi4_mb_distortion = i4_sad;
495 return ;
496 }
497 pu1_src += u4_src_offset;
498 pu1_est += u4_est_offset;
499 }
500
501 *pi4_mb_distortion = i4_sad;
502 return ;
503 }
504
505 /**
506 ******************************************************************************
507 *
508 * @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
509 *
510 * @par Description
511 * This functions computes SAD between 2 16x16 blocks. There is a provision
512 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
513 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
514 *
515 * @param[in] pu1_src
516 * UWORD8 pointer to the source
517 *
518 * @param[out] pu1_dst
519 * UWORD8 pointer to the destination
520 *
521 * @param[in] src_strd
522 * integer source stride
523 *
524 * @param[in] dst_strd
525 * integer destination stride
526 *
527 * @param[in] i4_max_sad
528 * integer maximum allowed distortion
529 *
530 * @param[out] pi4_mb_distortion
531 * integer evaluated sad
532 *
533 * @remarks
534 *
535 ******************************************************************************
536 */
ime_compute_sad_16x16_fast(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)537 void ime_compute_sad_16x16_fast(UWORD8 *pu1_src,
538 UWORD8 *pu1_est,
539 WORD32 src_strd,
540 WORD32 est_strd,
541 WORD32 i4_max_sad,
542 WORD32 *pi4_mb_distortion)
543 {
544
545 WORD32 i4_sad = 0;
546 UWORD32 u4_src_offset = 2 * src_strd - 16;
547 UWORD32 u4_est_offset = 2 * est_strd - 16;
548 UWORD32 i;
549
550 UNUSED(i4_max_sad);
551
552 for(i = 16; i > 0; i-= 2)
553 {
554 USADA8(pu1_src, pu1_est, i4_sad);
555 pu1_src += 4;
556 pu1_est += 4;
557
558 USADA8(pu1_src, pu1_est, i4_sad);
559 pu1_src += 4;
560 pu1_est += 4;
561
562 USADA8(pu1_src, pu1_est, i4_sad);
563 pu1_src += 4;
564 pu1_est += 4;
565
566 USADA8(pu1_src, pu1_est, i4_sad);
567 pu1_src += 4;
568 pu1_est += 4;
569
570 pu1_src += u4_src_offset;
571 pu1_est += u4_est_offset;
572 }
573
574 *pi4_mb_distortion = (i4_sad << 1);
575 return ;
576 }
577
578 /**
579 ******************************************************************************
580 *
581 * @brief computes distortion (SAD) between 2 8x8 blocks
582 *
583 * @par Description
584 * This functions computes SAD between 2 8x8 blocks. There is a provision
585 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
586 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
587 *
588 * @param[in] pu1_src
589 * UWORD8 pointer to the source
590 *
591 * @param[out] pu1_dst
592 * UWORD8 pointer to the destination
593 *
594 * @param[in] src_strd
595 * integer source stride
596 *
597 * @param[in] dst_strd
598 * integer destination stride
599 *
600 * @param[in] u4_max_sad
601 * integer maximum allowed distortion
602 *
603 * @param[out] i4_sad
604 * integer evaluated sad
605 *
606 * @remarks
607 *
608 ******************************************************************************
609 */
610
ime_compute_sad_8x8(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)611 void ime_compute_sad_8x8(UWORD8 *pu1_src,
612 UWORD8 *pu1_est,
613 WORD32 src_strd,
614 WORD32 est_strd,
615 WORD32 i4_max_sad,
616 WORD32 *pi4_mb_distortion)
617 {
618 WORD32 i4_sad = 0;
619 UWORD32 u4_src_offset = src_strd - 8;
620 UWORD32 u4_est_offset = est_strd - 8;
621 UWORD32 i, j;
622 WORD16 temp;
623
624 for(i = 8; i > 0; i--)
625 {
626 for(j = 8; j > 0; j--)
627 {
628 /* SAD */
629 temp = *pu1_src++ - *pu1_est++;
630 i4_sad += ABS(temp);
631 }
632 /* early exit */
633 if(i4_max_sad < i4_sad)
634 {
635 *pi4_mb_distortion = i4_sad;
636 return;
637 }
638 pu1_src += u4_src_offset;
639 pu1_est += u4_est_offset;
640 }
641 *pi4_mb_distortion = i4_sad;
642 }
643
644 /**
645 ******************************************************************************
646 *
647 * @brief computes distortion (SAD) between 2 4x4 blocks
648 *
649 * @par Description
650 * This functions computes SAD between 2 4x4 blocks. There is a provision
651 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
652 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
653 *
654 * @param[in] pu1_src
655 * UWORD8 pointer to the source
656 *
657 * @param[out] pu1_dst
658 * UWORD8 pointer to the destination
659 *
660 * @param[in] src_strd
661 * integer source stride
662 *
663 * @param[in] dst_strd
664 * integer destination stride
665 *
666 * @param[in] u4_max_sad
667 * integer maximum allowed distortion
668 *
669 * @param[out] pi4_mb_distortion
670 * integer evaluated sad
671 *
672 * @remarks
673 *
674 ******************************************************************************
675 */
ime_compute_sad_4x4(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)676 void ime_compute_sad_4x4
677 (
678 UWORD8 *pu1_src,
679 UWORD8 *pu1_est,
680 WORD32 src_strd,
681 WORD32 est_strd,
682 WORD32 i4_max_sad,
683 WORD32 *pi4_mb_distortion
684 )
685 {
686 WORD32 i4_sad = 0;
687
688 UNUSED(i4_max_sad);
689
690 USADA8(pu1_src, pu1_est, i4_sad);
691 pu1_src += src_strd;
692 pu1_est += est_strd;
693
694 USADA8(pu1_src, pu1_est, i4_sad);
695 pu1_src += src_strd;
696 pu1_est += est_strd;
697
698 USADA8(pu1_src, pu1_est, i4_sad);
699 pu1_src += src_strd;
700 pu1_est += est_strd;
701
702 USADA8(pu1_src, pu1_est, i4_sad);
703 *pi4_mb_distortion = i4_sad;
704 }
705
706
707 /**
708 ******************************************************************************
709 *
710 * @brief computes distortion (SAD) between 2 16x8 blocks
711 *
712 *
713 * @par Description
714 * This functions computes SAD between 2 16x8 blocks. There is a provision
715 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
716 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
717 *
718 * @param[in] pu1_src
719 * UWORD8 pointer to the source
720 *
721 * @param[out] pu1_dst
722 * UWORD8 pointer to the destination
723 *
724 * @param[in] src_strd
725 * integer source stride
726 *
727 * @param[in] dst_strd
728 * integer destination stride
729 *
730 * @param[in] u4_max_sad
731 * integer maximum allowed distortion
732 *
733 * @param[out] pi4_mb_distortion
734 * integer evaluated sad
735 *
736 * @remarks
737 *
738 ******************************************************************************
739 */
ime_compute_sad_16x8(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)740 void ime_compute_sad_16x8
741 (
742 UWORD8 *pu1_src,
743 UWORD8 *pu1_est,
744 WORD32 src_strd,
745 WORD32 est_strd,
746 WORD32 i4_max_sad,
747 WORD32 *pi4_mb_distortion
748 )
749 {
750 WORD32 i4_sad = 0;
751 UWORD32 u4_src_offset = src_strd - 16;
752 UWORD32 u4_est_offset = est_strd - 16;
753 UWORD32 i, j;
754 WORD16 temp;
755
756 GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8);
757
758 for(i = 8; i > 0; i--)
759 {
760 for(j = 16; j > 0; j--)
761 {
762 /* SAD */
763 temp = *pu1_src++ - *pu1_est++;
764 i4_sad += ABS(temp);
765 }
766 /* early exit */
767 if(i4_max_sad < i4_sad)
768 {
769
770 GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8-i);
771
772 *pi4_mb_distortion = i4_sad;
773
774 return;
775 }
776 pu1_src += u4_src_offset;
777 pu1_est += u4_est_offset;
778 }
779
780 *pi4_mb_distortion = i4_sad;
781 return;
782
783 }
784
785 /**
786 ******************************************************************************
787 *
788 * @brief computes distortion (SAD) between 2 16x16 blocks
789 *
790 * @par Description
791 * This functions computes SAD between 2 16x16 blocks. There is a provision
792 * for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
793 * compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
794 *
795 * @param[in] pu1_src
796 * UWORD8 pointer to the source
797 *
798 * @param[out] pu1_dst
799 * UWORD8 pointer to the destination
800 *
801 * @param[in] src_strd
802 * integer source stride
803 *
804 * @param[in] dst_strd
805 * integer destination stride
806 *
807 * @param[in] i4_max_sad
808 * integer maximum allowed distortion
809 *
810 * @param[out] pi4_mb_distortion
811 * integer evaluated sad
812 *
813 * @remarks
814 *
815 ******************************************************************************
816 */
ime_compute_sad_16x16_ea8(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)817 void ime_compute_sad_16x16_ea8(UWORD8 *pu1_src,
818 UWORD8 *pu1_est,
819 WORD32 src_strd,
820 WORD32 est_strd,
821 WORD32 i4_max_sad,
822 WORD32 *pi4_mb_distortion)
823 {
824 WORD32 i4_sad = 0;
825 UWORD32 u4_src_offset = src_strd - 16;
826 UWORD32 u4_est_offset = est_strd - 16;
827 UWORD32 i, j;
828 WORD16 temp;
829 UWORD8 *pu1_src_temp = pu1_src + src_strd;
830 UWORD8 *pu1_est_temp = pu1_est + est_strd;
831
832 for(i = 16; i > 0; i -= 2)
833 {
834 for(j = 16; j > 0; j--)
835 {
836 /* SAD */
837 temp = *pu1_src++ - *pu1_est++;
838 i4_sad += ABS(temp);
839 }
840
841 pu1_src += (u4_src_offset + src_strd);
842 pu1_est += (u4_est_offset + est_strd);
843
844 }
845
846 /* early exit */
847 if(i4_max_sad < i4_sad)
848 {
849 *pi4_mb_distortion = i4_sad;
850 return;
851 }
852
853 pu1_src = pu1_src_temp;
854 pu1_est = pu1_est_temp;
855
856 for(i = 16; i > 0; i -= 2)
857 {
858 for(j = 16; j > 0; j--)
859 {
860 /* SAD */
861 temp = *pu1_src++ - *pu1_est++;
862 i4_sad += ABS(temp);
863 }
864
865 pu1_src += u4_src_offset + src_strd;
866 pu1_est += u4_est_offset + est_strd;
867 }
868
869 *pi4_mb_distortion = i4_sad;
870 return;
871 }
872
873
874 /**
875 *******************************************************************************
876 *
877 * @brief This function computes SAD between two 16x16 blocks
878 * It also computes if the block will be zero after H264 transform and quant for
879 * Intra 16x16 blocks
880 *
881 * @param[in] pu1_src
882 * UWORD8 pointer to the source
883 *
884 * @param[out] pu1_dst
885 * UWORD8 pointer to the destination
886 *
887 * @param[in] src_strd
888 * integer source stride
889 *
890 * @param[in] dst_strd
891 * integer destination stride
892 *
893 * @param[in] pu2_thrsh
894 * Threshold for each element of transofrmed quantized block
895 *
896 * @param[out] pi4_mb_distortion
897 * integer evaluated sad
898 *
899 * @param[out] pu4_is_zero
900 * Poitner to store if the block is zero after transform and quantization
901 *
902 * @remarks
903 *
904 ******************************************************************************
905 */
ime_compute_satqd_16x16_lumainter(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,UWORD16 * pu2_thrsh,WORD32 * pi4_mb_distortion,UWORD32 * pu4_is_non_zero)906 void ime_compute_satqd_16x16_lumainter(UWORD8 *pu1_src,
907 UWORD8 *pu1_est,
908 WORD32 src_strd,
909 WORD32 est_strd,
910 UWORD16 *pu2_thrsh,
911 WORD32 *pi4_mb_distortion,
912 UWORD32 *pu4_is_non_zero)
913 {
914 UWORD32 i,j;
915 WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
916 UWORD8 *pu1_src_lp,*pu1_est_lp;
917 UWORD32 sad = 0;
918
919 (*pi4_mb_distortion) = 0;
920 for(i=0;i<4;i++)
921 {
922 for(j=0;j<4;j++)
923 {
924 pu1_src_lp = pu1_src + 4*j;
925 pu1_est_lp = pu1_est + 4*j;
926
927 s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
928 s4 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
929
930 pu1_src_lp += src_strd;
931 pu1_est_lp += est_strd;
932
933 s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
934 s3 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
935
936 pu1_src_lp += src_strd;
937 pu1_est_lp += est_strd;
938
939 s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
940 s3 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
941
942 pu1_src_lp += src_strd;
943 pu1_est_lp += est_strd;
944
945 s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
946 s4 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
947
948 sad_1 = s1+s2+s3+s4;
949
950 if(sad == 0)
951 {
952 sad_2 = sad_1<<1;
953
954 ls1 = sad_2 -(s2 + s3);
955 ls2 = sad_2 -(s1 + s4);
956 ls3 = sad_2 -(s3 + s4);
957 ls4 = sad_2 -(s3 - (s1<<1));
958 ls5 = sad_2 -(s4 - (s2<<1));
959 ls6 = sad_2 -(s1 + s2);
960 ls7 = sad_2 -(s2 - (s4<<1));
961 ls8 = sad_2 -(s1 - (s3<<1));
962
963 if(
964 pu2_thrsh[8] <= sad_1 ||
965 pu2_thrsh[0] <= ls2 ||
966 pu2_thrsh[1] <= ls1 ||
967 pu2_thrsh[2] <= ls8 ||
968 pu2_thrsh[3] <= ls5 ||
969
970 pu2_thrsh[4] <= ls6 ||
971 pu2_thrsh[5] <= ls3 ||
972 pu2_thrsh[6] <= ls7 ||
973 pu2_thrsh[7] <= ls4
974
975 )sad = 1;
976 }
977 (*pi4_mb_distortion) += sad_1;
978 }
979 pu1_src += (src_strd *4);
980 pu1_est += (est_strd *4);
981 }
982 *pu4_is_non_zero = sad;
983 }
984
985
986 /**
987 ******************************************************************************
988 *
989 * @brief computes distortion (SAD and SAQTD) between 2 16x8 (interleaved) chroma blocks
990 *
991 *
992 * @par Description
993 * This functions computes SAD between2 16x8 chroma blocks(interleaved)
994 * It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
995 * If SAQTD is zero, it gives back zero
996 * Other wise sad is retrned
997 * There is no provison for early exit
998 *
999 * The transform done here is the transform for chroma blocks in H264
1000 *
1001 * @param[in] pu1_src
1002 * UWORD8 pointer to the source
1003 *
1004 * @param[out] pu1_dst
1005 * UWORD8 pointer to the destination
1006 *
1007 * @param[in] src_strd
1008 * integer source stride
1009 *
1010 * @param[in] dst_strd
1011 * integer destination stride
1012 *
1013 * @param[in] pu2_thrsh
1014 * Threshold for each element of transofrmed quantized block
1015 *
1016 * @param[out] pi4_mb_distortion
1017 * integer evaluated sad
1018 *
1019 * @remarks
1020 * Fucntion code is nit updated.
1021 * Will require debugging and minor modifications
1022 *
1023 ******************************************************************************
1024 */
ime_compute_satqd_8x16_chroma(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 max_sad,UWORD16 * thrsh)1025 void ime_compute_satqd_8x16_chroma(UWORD8 *pu1_src,
1026 UWORD8 *pu1_est,
1027 WORD32 src_strd,
1028 WORD32 est_strd,
1029 WORD32 max_sad,
1030 UWORD16 *thrsh)
1031 {
1032 WORD32 i,j,plane;
1033 WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
1034 UWORD8 *pu1_src_lp,*pu1_est_lp,*pu1_src_plane,*pu1_est_plane;
1035 WORD32 sad =0;
1036 UNUSED(max_sad);
1037
1038 pu1_src_plane = pu1_src;
1039 pu1_est_plane = pu1_est;
1040
1041 for(plane =0;plane<2;plane++)
1042 {
1043 for(i=0;i<4;i++)
1044 {
1045 for(j=0;j<4;j++)
1046 {
1047 pu1_src_lp = pu1_src + 8*j;
1048 pu1_est_lp = pu1_est + 8*j;
1049
1050 s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1051 s4 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1052
1053 pu1_src_lp += src_strd;
1054 pu1_est_lp += est_strd;
1055
1056 s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1057 s3 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1058
1059 pu1_src_lp += src_strd;
1060 pu1_est_lp += est_strd;
1061
1062 s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1063 s3 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1064
1065 pu1_src_lp += src_strd;
1066 pu1_est_lp += est_strd;
1067
1068 s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1069 s4 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1070
1071 sad_1 = s1+s2+s3+s4;
1072 sad_2 = sad_1<<1;
1073
1074 ls1 = sad_2 -(s2 + s3);
1075 ls2 = sad_2 -(s1 + s4);
1076 ls3 = sad_2 -(s3 + s4);
1077 ls4 = sad_2 -(s3 - (s1<<1));
1078 ls5 = sad_2 -(s4 - (s2<<1));
1079 ls6 = sad_2 -(s1 + s2);
1080 ls7 = sad_2 -(s2 - (s4<<1));
1081 ls8 = sad_2 -(s1 - (s3<<1));
1082
1083 if(
1084 //thrsh[0] > sad_1 && Chroma Dc is checked later
1085 thrsh[1] > ls1 &&
1086 thrsh[2] > sad_1 &&
1087 thrsh[3] > ls2 &&
1088
1089 thrsh[4] > ls3 &&
1090 thrsh[5] > ls4 &&
1091 thrsh[6] > ls3 &&
1092 thrsh[7] > ls5 &&
1093
1094 thrsh[8] > sad_1 &&
1095 thrsh[9] > ls1 &&
1096 thrsh[10]> sad_1 &&
1097 thrsh[11]> ls2 &&
1098
1099 thrsh[12]> ls6 &&
1100 thrsh[13]> ls7 &&
1101 thrsh[14]> ls6 &&
1102 thrsh[15]> ls8
1103 )
1104 {
1105 /*set current sad to be zero*/
1106 }
1107 else
1108 return ;
1109
1110 sad += sad_1;
1111 }
1112 pu1_src += (src_strd *4);
1113 pu1_est += (est_strd *4);
1114 }
1115 if(sad < (thrsh[0]<<1))sad = 0;
1116 else return ;
1117
1118 pu1_src = pu1_src_plane+1;
1119 pu1_est = pu1_est_plane+1;
1120 }
1121 return ;
1122 }
1123
1124
1125 /**
1126 ******************************************************************************
1127 *
1128 * @brief computes distortion (SAD and SAQTD) between 2 16x16 blocks
1129 *
1130 * @par Description
1131 * This functions computes SAD between 2 16x16 blocks.
1132 * It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
1133 * If SAQTD is zero, it gives back zero
1134 * Other wise sad is retrned
1135 * There is no provison for early exit
1136 *
1137 * The transform done here is the transform for inter 16x16 blocks in H264
1138 *
1139 * @param[in] pu1_src
1140 * UWORD8 pointer to the source
1141 *
1142 * @param[out] pu1_dst
1143 * UWORD8 pointer to the destination
1144 *
1145 * @param[in] src_strd
1146 * integer source stride
1147 *
1148 * @param[in] dst_strd
1149 * integer destination stride
1150 *
1151 * @param[in] pu2_thrsh
1152 * Threshold for each element of transofrmed quantized block
1153 *
1154 * @param[out] pi4_mb_distortion
1155 * integer evaluated sad
1156 *
1157 * @remarks
1158 *
1159 ******************************************************************************
1160 */
ime_compute_satqd_16x16_lumaintra(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 max_sad,UWORD16 * thrsh,WORD32 * pi4_mb_distortion,UWORD8 * sig_nz_sad)1161 void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src,
1162 UWORD8 *pu1_est,
1163 WORD32 src_strd,
1164 WORD32 est_strd,
1165 WORD32 max_sad,
1166 UWORD16 *thrsh,
1167 WORD32 *pi4_mb_distortion,
1168 UWORD8 *sig_nz_sad)
1169 {
1170 UWORD32 i,j;
1171 WORD16 s1[4],s2[4],s3[4],s4[4],sad[4];
1172 UWORD8 *pu1_src_lp,*pu1_est_lp;
1173 UWORD8 *sig_sad_dc;
1174 UWORD32 nz_sad_sig = 0;
1175 UNUSED(max_sad);
1176 *pi4_mb_distortion =0;
1177
1178 sig_sad_dc = sig_nz_sad;
1179 sig_nz_sad++;
1180
1181 for(i=0;i<4;i++)
1182 {
1183 for(j=0;j<4;j++)
1184 {
1185 pu1_src_lp = pu1_src + 4*j;
1186 pu1_est_lp = pu1_est + 4*j;
1187
1188 s1[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1189 s4[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1190
1191 pu1_src_lp += src_strd;
1192 pu1_est_lp += est_strd;
1193
1194 s2[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1195 s3[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1196
1197 pu1_src_lp += src_strd;
1198 pu1_est_lp += est_strd;
1199
1200 s2[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1201 s3[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1202
1203 pu1_src_lp += src_strd;
1204 pu1_est_lp += est_strd;
1205
1206 s1[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1207 s4[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1208
1209 sad[j] = ((s1[j]+s2[j]+s3[j]+s4[j])<<1);
1210 }
1211
1212 for(j=0;j<4;j++)
1213 {
1214
1215 if(
1216 //thrsh[0] > (sad[j] >> 1) &&Dc goes in the other part
1217 thrsh[1] > (sad[j] -(s2[j] + s3[j])) &&
1218 thrsh[2] > (sad[j]>>1) &&
1219 thrsh[3] > (sad[j] -(s1[j] + s4[j])) &&
1220
1221 thrsh[4] > (sad[j] -(s3[j] + s4[j])) &&
1222 thrsh[5] > (sad[j] -(s3[j] - (s1[j]<<1))) &&
1223 thrsh[6] > (sad[j] -(s3[j] + s4[j])) &&
1224 thrsh[7] > (sad[j] -(s4[j] - (s2[j]<<1))) &&
1225
1226 thrsh[8] > (sad[j]>>1) &&
1227 thrsh[9] > (sad[j] -(s2[j] + s3[j])) &&
1228 thrsh[10]> (sad[j]>>1) &&
1229 thrsh[11]> (sad[j] -(s1[j] + s4[j])) &&
1230
1231 thrsh[12]> (sad[j] -(s1[j] + s2[j])) &&
1232 thrsh[13]> (sad[j] -(s2[j] - (s4[j]<<1))) &&
1233 thrsh[14]> (sad[j] -(s1[j] + s2[j])) &&
1234 thrsh[15]> (sad[j] -(s1[j] - (s3[j]<<1)))
1235 )
1236 {
1237 //sad[j] = 0; /*set current sad to be zero*/
1238 sig_nz_sad[j] = 0;/*Signal that the sad is zero*/
1239 }
1240 else
1241 {
1242 sig_nz_sad[j] = 1;/*signal that sad is non zero*/
1243 nz_sad_sig = 1;
1244 }
1245
1246 (*pi4_mb_distortion) += (sad[j]>>1);
1247 //if((*pi4_mb_distortion) >= max_sad)return; /*return or some thing*/
1248 }
1249
1250 sig_nz_sad += 4;
1251 pu1_src += (src_strd *4);
1252 pu1_est += (est_strd *4);
1253 }
1254
1255 if((*pi4_mb_distortion) < thrsh[0]<<2)
1256 {
1257 *sig_sad_dc = 0;
1258 if(nz_sad_sig == 0)(*pi4_mb_distortion) = 0;
1259 }
1260 else *sig_sad_dc = 1;
1261 }
1262
1263
1264