• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 ******************************************************************************
23 * @file ih264e_distortion_metrics.c
24 *
25 * @brief
26 *  This file contains definitions of routines that compute distortion
27 *  between two macro/sub blocks of identical dimensions
28 *
29 * @author
30 *  Ittiam
31 *
32 * @par List of Functions:
33 *  - ime_sub_pel_compute_sad_16x16()
34 *  - ime_calculate_sad4_prog()
35 *  - ime_calculate_sad3_prog()
36 *  - ime_calculate_sad2_prog()
37 *  - ime_compute_sad_16x16()
38 *  - ime_compute_sad_16x16_fast()
39 *  - ime_compute_sad_16x16_ea8()
40 *  - ime_compute_sad_8x8()
41 *  - ime_compute_sad_4x4()
42 *  - ime_compute_sad_16x8()
43 *  - ime_compute_satqd_16x16_lumainter()
44 *  - ime_compute_satqd_8x16_chroma()
45 *  - ime_compute_satqd_16x16_lumaintra()
46 *
47 *
48 * @remarks
49 *  None
50 *
51 *******************************************************************************
52 */
53 
54 /*****************************************************************************/
55 /* File Includes                                                             */
56 /*****************************************************************************/
57 
58 /* System include files */
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 
63 /* User include files */
64 #include "ime_typedefs.h"
65 #include "ime_defs.h"
66 #include "ime_macros.h"
67 #include "ime_statistics.h"
68 #include "ime_platform_macros.h"
69 #include "ime_distortion_metrics.h"
70 
71 
72 /*****************************************************************************/
73 /* Function Definitions                                                      */
74 /*****************************************************************************/
75 
76 /**
77 ******************************************************************************
78 *
79 * @brief computes distortion (SAD) at all subpel points about the src location
80 *
81 * @par Description
82 *   This functions computes SAD at all points at a subpel distance from the
83 *   current source location.
84 *
85 * @param[in] pu1_src
86 *  UWORD8 pointer to the source
87 *
88 * @param[out] pu1_ref_half_x
89 *  UWORD8 pointer to half pel buffer
90 *
91 * @param[out] pu1_ref_half_y
92 *  UWORD8 pointer to half pel buffer
93 *
94 * @param[out] pu1_ref_half_xy
95 *  UWORD8 pointer to half pel buffer
96 *
97 * @param[in] src_strd
98 *  integer source stride
99 *
100 * @param[in] ref_strd
101 *  integer ref stride
102 *
103 * @param[out] pi4_sad
104 *  integer evaluated sad
105 *  pi4_sad[0] - half x
106 *  pi4_sad[1] - half x - 1
107 *  pi4_sad[2] - half y
108 *  pi4_sad[3] - half y - 1
109 *  pi4_sad[4] - half xy
110 *  pi4_sad[5] - half xy - 1
111 *  pi4_sad[6] - half xy - strd
112 *  pi4_sad[7] - half xy - 1 - strd
113 *
114 * @remarks
115 *
116 ******************************************************************************
117 */
ime_sub_pel_compute_sad_16x16(UWORD8 * pu1_src,UWORD8 * pu1_ref_half_x,UWORD8 * pu1_ref_half_y,UWORD8 * pu1_ref_half_xy,WORD32 src_strd,WORD32 ref_strd,WORD32 * pi4_sad)118 void ime_sub_pel_compute_sad_16x16(UWORD8 *pu1_src,
119                                    UWORD8 *pu1_ref_half_x,
120                                    UWORD8 *pu1_ref_half_y,
121                                    UWORD8 *pu1_ref_half_xy,
122                                    WORD32 src_strd,
123                                    WORD32 ref_strd,
124                                    WORD32 *pi4_sad)
125 {
126     UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
127     UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
128     UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
129     UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
130     UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
131 
132     WORD32 row, col;
133 
134     memset(pi4_sad, 0, 8 * sizeof(WORD32));
135 
136     for(row = 0; row < MB_SIZE; row++)
137     {
138         for(col = 0; col < MB_SIZE; col++)
139         {
140             WORD32 src;
141             WORD32 diff;
142 
143             src = pu1_src[col];
144 
145             diff = src - pu1_ref_half_x[col];
146             pi4_sad[0] += ABS(diff);
147 
148             diff = src - pu1_ref_half_x_left[col];
149             pi4_sad[1] += ABS(diff);
150 
151             diff = src - pu1_ref_half_y[col];
152             pi4_sad[2] += ABS(diff);
153 
154             diff = src - pu1_ref_half_y_top[col];
155             pi4_sad[3] += ABS(diff);
156 
157             diff = src - pu1_ref_half_xy[col];
158             pi4_sad[4] += ABS(diff);
159 
160             diff = src - pu1_ref_half_xy_left[col];
161             pi4_sad[5] += ABS(diff);
162 
163             diff = src - pu1_ref_half_xy_top[col];
164             pi4_sad[6] += ABS(diff);
165 
166             diff = src - pu1_ref_half_xy_top_left[col];
167             pi4_sad[7] += ABS(diff);
168         }
169 
170         pu1_src += src_strd;
171 
172         pu1_ref_half_x += ref_strd;
173         pu1_ref_half_x_left += ref_strd;
174 
175         pu1_ref_half_y += ref_strd;
176         pu1_ref_half_y_top += ref_strd;
177 
178         pu1_ref_half_xy += ref_strd;
179         pu1_ref_half_xy_left += ref_strd;
180         pu1_ref_half_xy_top += ref_strd;
181         pu1_ref_half_xy_top_left += ref_strd;
182     }
183 }
184 
185 /**
186 *******************************************************************************
187 *
188 * @brief compute sad
189 *
190 * @par Description: This function computes the sad at vertices of diamond grid
191 * centered at reference pointer and at unit distance from it.
192 *
193 * @param[in] pu1_ref
194 *  UWORD8 pointer to the reference
195 *
196 * @param[out] pu1_src
197 *  UWORD8 pointer to the source
198 *
199 * @param[in] ref_strd
200 *  integer reference stride
201 *
202 * @param[in] src_strd
203 *  integer source stride
204 *
205 * @param[out] pi4_sad
206 *  pointer to integer array evaluated sad
207 *
208 * @returns  sad at all evaluated vertexes
209 *
210 * @remarks  none
211 *
212 *******************************************************************************
213 */
ime_calculate_sad4_prog(UWORD8 * pu1_ref,UWORD8 * pu1_src,WORD32 ref_strd,WORD32 src_strd,WORD32 * pi4_sad)214 void ime_calculate_sad4_prog(UWORD8 *pu1_ref,
215                              UWORD8 *pu1_src,
216                              WORD32 ref_strd,
217                              WORD32 src_strd,
218                              WORD32 *pi4_sad)
219 {
220 
221     /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
222     UWORD8 *left_ptr    = pu1_ref - 1;
223     UWORD8 *right_ptr   = pu1_ref + 1;
224     UWORD8 *top_ptr     = pu1_ref - ref_strd;
225     UWORD8 *bot_ptr     = pu1_ref + ref_strd;
226 
227     /* temp var */
228     WORD32 count2, count3;
229     UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
230     UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
231 
232     memset(pi4_sad, 0, 4 * sizeof(WORD32));
233 
234     for(count2 = MB_SIZE; count2 > 0; count2--)
235     {
236         for(count3 = MB_SIZE; count3 > 0 ; count3--)
237         {
238             WORD32 src;
239             WORD32 diff;
240 
241             src = *pu1_src++;
242 
243             diff = src - *left_ptr++;
244             pi4_sad[0] += ABS(diff);
245 
246             diff = src - *right_ptr++;
247             pi4_sad[1] += ABS(diff);
248 
249             diff = src - *top_ptr++;
250             pi4_sad[2] += ABS(diff);
251 
252             diff = src - *bot_ptr++;
253             pi4_sad[3]  += ABS(diff);
254         }
255 
256         bot_ptr    += u4_ref_buf_offset;
257         left_ptr   += u4_ref_buf_offset;
258         right_ptr  += u4_ref_buf_offset;
259         top_ptr    += u4_ref_buf_offset;
260 
261         pu1_src += u4_cur_buf_offset;
262     }
263 
264 }
265 
266 /**
267 *******************************************************************************
268 *
269 * @brief compute sad
270 *
271 * @par Description: This function computes the sad at vertices of diamond grid
272 * centered at reference pointer and at unit distance from it.
273 *
274 * @param[in] pu1_ref1, pu1_ref2, pu1_ref3
275 *  UWORD8 pointer to the reference
276 *
277 * @param[out] pu1_src
278 *  UWORD8 pointer to the source
279 *
280 * @param[in] ref_strd
281 *  integer reference stride
282 *
283 * @param[in] src_strd
284 *  integer source stride
285 *
286 * @param[out] pi4_sad
287 *  pointer to integer array evaluated sad
288 *
289 * @returns  sad at all evaluated vertexes
290 *
291 * @remarks  none
292 *
293 *******************************************************************************
294 */
ime_calculate_sad3_prog(UWORD8 * pu1_ref1,UWORD8 * pu1_ref2,UWORD8 * pu1_ref3,UWORD8 * pu1_src,WORD32 ref_strd,WORD32 src_strd,WORD32 * pi4_sad)295 void ime_calculate_sad3_prog(UWORD8 *pu1_ref1,
296                              UWORD8 *pu1_ref2,
297                              UWORD8 *pu1_ref3,
298                              UWORD8 *pu1_src,
299                              WORD32 ref_strd,
300                              WORD32 src_strd,
301                              WORD32 *pi4_sad)
302 {
303     /* temp var */
304     WORD32 i;
305     UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
306     UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
307 
308     for(i = 16; i > 0; i--)
309     {
310         USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
311         USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
312         USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
313         pu1_src += 4;
314         pu1_ref1 += 4;
315         pu1_ref2 += 4;
316         pu1_ref3 += 4;
317 
318         USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
319         USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
320         USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
321         pu1_src += 4;
322         pu1_ref1 += 4;
323         pu1_ref2 += 4;
324         pu1_ref3 += 4;
325 
326         USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
327         USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
328         USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
329         pu1_src += 4;
330         pu1_ref1 += 4;
331         pu1_ref2 += 4;
332         pu1_ref3 += 4;
333 
334         USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
335         USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
336         USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
337         pu1_src += 4;
338         pu1_ref1 += 4;
339         pu1_ref2 += 4;
340         pu1_ref3 += 4;
341 
342         pu1_src += u4_cur_buf_offset;
343         pu1_ref1 += u4_ref_buf_offset;
344         pu1_ref2 += u4_ref_buf_offset;
345         pu1_ref3 += u4_ref_buf_offset;
346     }
347 
348 }
349 
350 /**
351 *******************************************************************************
352 *
353 * @brief compute sad
354 *
355 * @par Description: This function computes the sad at vertices of diamond grid
356 * centered at reference pointer and at unit distance from it.
357 *
358 * @param[in] pu1_ref1, pu1_ref2
359 *  UWORD8 pointer to the reference
360 *
361 * @param[out] pu1_src
362 *  UWORD8 pointer to the source
363 *
364 * @param[in] ref_strd
365 *  integer reference stride
366 *
367 * @param[in] src_strd
368 *  integer source stride
369 *
370 * @param[out] pi4_sad
371 *  pointer to integer array evaluated sad
372 *
373 * @returns  sad at all evaluated vertexes
374 *
375 * @remarks  none
376 *
377 *******************************************************************************
378 */
ime_calculate_sad2_prog(UWORD8 * pu1_ref1,UWORD8 * pu1_ref2,UWORD8 * pu1_src,WORD32 ref_strd,WORD32 src_strd,WORD32 * pi4_sad)379 void ime_calculate_sad2_prog(UWORD8 *pu1_ref1,
380                              UWORD8 *pu1_ref2,
381                              UWORD8 *pu1_src,
382                              WORD32 ref_strd,
383                              WORD32 src_strd,
384                              WORD32 *pi4_sad)
385 {
386     /* temp var */
387     WORD32 i;
388     UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
389     UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
390 
391     for(i = 16; i > 0; i--)
392     {
393         USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
394         USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
395         pu1_src += 4;
396         pu1_ref1 += 4;
397         pu1_ref2 += 4;
398 
399         USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
400         USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
401         pu1_src += 4;
402         pu1_ref1 += 4;
403         pu1_ref2 += 4;
404 
405         USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
406         USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
407         pu1_src += 4;
408         pu1_ref1 += 4;
409         pu1_ref2 += 4;
410 
411         USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
412         USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
413         pu1_src += 4;
414         pu1_ref1 += 4;
415         pu1_ref2 += 4;
416 
417         pu1_src += u4_cur_buf_offset;
418         pu1_ref1 += u4_ref_buf_offset;
419         pu1_ref2 += u4_ref_buf_offset;
420     }
421 
422 }
423 
424 /**
425 ******************************************************************************
426 *
427 * @brief computes distortion (SAD) between 2 16x16 blocks
428 *
429 * @par   Description
430 *   This functions computes SAD between 2 16x16 blocks. There is a provision
431 *   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
432 *   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
433 *
434 * @param[in] pu1_src
435 *  UWORD8 pointer to the source
436 *
437 * @param[out] pu1_dst
438 *  UWORD8 pointer to the destination
439 *
440 * @param[in] src_strd
441 *  integer source stride
442 *
443 * @param[in] dst_strd
444 *  integer destination stride
445 *
446 * @param[in] i4_max_sad
447 *  integer maximum allowed distortion
448 *
449 * @param[out] pi4_mb_distortion
450 *  integer evaluated sad
451 *
452 * @remarks
453 *
454 ******************************************************************************
455 */
ime_compute_sad_16x16(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)456 void ime_compute_sad_16x16(UWORD8 *pu1_src,
457                            UWORD8 *pu1_est,
458                            WORD32 src_strd,
459                            WORD32 est_strd,
460                            WORD32 i4_max_sad,
461                            WORD32 *pi4_mb_distortion)
462 {
463     WORD32 i4_sad = 0;
464     UWORD32 u4_src_offset = src_strd - 16;
465     UWORD32 u4_est_offset = est_strd - 16;
466     UWORD32 i;
467 
468 GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16);
469 
470     for(i = 16; i > 0; i--)
471     {
472         USADA8(pu1_src, pu1_est, i4_sad);
473         pu1_src += 4;
474         pu1_est += 4;
475 
476         USADA8(pu1_src, pu1_est, i4_sad);
477         pu1_src += 4;
478         pu1_est += 4;
479 
480         USADA8(pu1_src, pu1_est, i4_sad);
481         pu1_src += 4;
482         pu1_est += 4;
483 
484         USADA8(pu1_src, pu1_est, i4_sad);
485         pu1_src += 4;
486         pu1_est += 4;
487 
488         /* early exit */
489         if(i4_max_sad < i4_sad)
490         {
491 
492 GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16-i);
493 
494             *pi4_mb_distortion = i4_sad;
495             return ;
496         }
497         pu1_src += u4_src_offset;
498         pu1_est += u4_est_offset;
499     }
500 
501     *pi4_mb_distortion = i4_sad;
502     return ;
503 }
504 
505 /**
506 ******************************************************************************
507 *
508 * @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
509 *
510 * @par   Description
511 *   This functions computes SAD between 2 16x16 blocks. There is a provision
512 *   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
513 *   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
514 *
515 * @param[in] pu1_src
516 *  UWORD8 pointer to the source
517 *
518 * @param[out] pu1_dst
519 *  UWORD8 pointer to the destination
520 *
521 * @param[in] src_strd
522 *  integer source stride
523 *
524 * @param[in] dst_strd
525 *  integer destination stride
526 *
527 * @param[in] i4_max_sad
528 *  integer maximum allowed distortion
529 *
530 * @param[out] pi4_mb_distortion
531 *  integer evaluated sad
532 *
533 * @remarks
534 *
535 ******************************************************************************
536 */
ime_compute_sad_16x16_fast(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)537 void ime_compute_sad_16x16_fast(UWORD8 *pu1_src,
538                                 UWORD8 *pu1_est,
539                                 WORD32 src_strd,
540                                 WORD32 est_strd,
541                                 WORD32 i4_max_sad,
542                                 WORD32 *pi4_mb_distortion)
543 {
544 
545     WORD32 i4_sad = 0;
546     UWORD32 u4_src_offset = 2 * src_strd - 16;
547     UWORD32 u4_est_offset = 2 * est_strd - 16;
548     UWORD32 i;
549 
550     UNUSED(i4_max_sad);
551 
552     for(i = 16; i > 0; i-= 2)
553     {
554         USADA8(pu1_src, pu1_est, i4_sad);
555         pu1_src += 4;
556         pu1_est += 4;
557 
558         USADA8(pu1_src, pu1_est, i4_sad);
559         pu1_src += 4;
560         pu1_est += 4;
561 
562         USADA8(pu1_src, pu1_est, i4_sad);
563         pu1_src += 4;
564         pu1_est += 4;
565 
566         USADA8(pu1_src, pu1_est, i4_sad);
567         pu1_src += 4;
568         pu1_est += 4;
569 
570         pu1_src += u4_src_offset;
571         pu1_est += u4_est_offset;
572     }
573 
574     *pi4_mb_distortion = (i4_sad << 1);
575     return ;
576 }
577 
578 /**
579 ******************************************************************************
580 *
581 *  @brief computes distortion (SAD) between 2 8x8 blocks
582 *
583 *  @par   Description
584 *   This functions computes SAD between 2 8x8 blocks. There is a provision
585 *   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
586 *   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
587 *
588 * @param[in] pu1_src
589 *  UWORD8 pointer to the source
590 *
591 * @param[out] pu1_dst
592 *  UWORD8 pointer to the destination
593 *
594 * @param[in] src_strd
595 *  integer source stride
596 *
597 * @param[in] dst_strd
598 *  integer destination stride
599 *
600 * @param[in] u4_max_sad
601 *  integer maximum allowed distortion
602 *
603 * @param[out] i4_sad
604 *  integer evaluated sad
605 *
606 * @remarks
607 *
608 ******************************************************************************
609  */
610 
ime_compute_sad_8x8(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)611 void ime_compute_sad_8x8(UWORD8 *pu1_src,
612                          UWORD8 *pu1_est,
613                          WORD32 src_strd,
614                          WORD32 est_strd,
615                          WORD32 i4_max_sad,
616                          WORD32 *pi4_mb_distortion)
617 {
618     WORD32 i4_sad = 0;
619     UWORD32 u4_src_offset = src_strd - 8;
620     UWORD32 u4_est_offset = est_strd - 8;
621     UWORD32 i, j;
622     WORD16 temp;
623 
624     for(i = 8; i > 0; i--)
625     {
626         for(j = 8; j > 0; j--)
627         {
628             /* SAD */
629             temp = *pu1_src++ - *pu1_est++;
630             i4_sad += ABS(temp);
631         }
632         /* early exit */
633         if(i4_max_sad < i4_sad)
634         {
635             *pi4_mb_distortion = i4_sad;
636             return;
637         }
638         pu1_src += u4_src_offset;
639         pu1_est += u4_est_offset;
640     }
641     *pi4_mb_distortion = i4_sad;
642 }
643 
644 /**
645 ******************************************************************************
646 *
647 *  @brief computes distortion (SAD) between 2 4x4 blocks
648 *
649 *  @par   Description
650 *   This functions computes SAD between 2 4x4 blocks. There is a provision
651 *   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
652 *   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
653 *
654 * @param[in] pu1_src
655 *  UWORD8 pointer to the source
656 *
657 * @param[out] pu1_dst
658 *  UWORD8 pointer to the destination
659 *
660 * @param[in] src_strd
661 *  integer source stride
662 *
663 * @param[in] dst_strd
664 *  integer destination stride
665 *
666 * @param[in] u4_max_sad
667 *  integer maximum allowed distortion
668 *
669 * @param[out] pi4_mb_distortion
670 *  integer evaluated sad
671 *
672 * @remarks
673 *
674 ******************************************************************************
675 */
ime_compute_sad_4x4(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)676 void ime_compute_sad_4x4
677         (
678             UWORD8 *pu1_src,
679             UWORD8 *pu1_est,
680             WORD32 src_strd,
681             WORD32 est_strd,
682             WORD32 i4_max_sad,
683             WORD32 *pi4_mb_distortion
684         )
685 {
686     WORD32 i4_sad = 0;
687 
688     UNUSED(i4_max_sad);
689 
690     USADA8(pu1_src, pu1_est, i4_sad);
691     pu1_src += src_strd;
692     pu1_est += est_strd;
693 
694     USADA8(pu1_src, pu1_est, i4_sad);
695     pu1_src += src_strd;
696     pu1_est += est_strd;
697 
698     USADA8(pu1_src, pu1_est, i4_sad);
699     pu1_src += src_strd;
700     pu1_est += est_strd;
701 
702     USADA8(pu1_src, pu1_est, i4_sad);
703     *pi4_mb_distortion = i4_sad;
704 }
705 
706 
707 /**
708 ******************************************************************************
709 *
710 *  @brief computes distortion (SAD) between 2 16x8  blocks
711 *
712 *
713 *  @par   Description
714 *   This functions computes SAD between 2 16x8 blocks. There is a provision
715 *   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
716 *   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
717 *
718 * @param[in] pu1_src
719 *  UWORD8 pointer to the source
720 *
721 * @param[out] pu1_dst
722 *  UWORD8 pointer to the destination
723 *
724 * @param[in] src_strd
725 *  integer source stride
726 *
727 * @param[in] dst_strd
728 *  integer destination stride
729 *
730 * @param[in] u4_max_sad
731 *  integer maximum allowed distortion
732 *
733 * @param[out] pi4_mb_distortion
734 *  integer evaluated sad
735 *
736 * @remarks
737 *
738 ******************************************************************************
739 */
ime_compute_sad_16x8(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)740 void ime_compute_sad_16x8
741         (
742             UWORD8 *pu1_src,
743             UWORD8 *pu1_est,
744             WORD32 src_strd,
745             WORD32 est_strd,
746             WORD32 i4_max_sad,
747             WORD32 *pi4_mb_distortion
748         )
749 {
750     WORD32 i4_sad = 0;
751     UWORD32 u4_src_offset = src_strd - 16;
752     UWORD32 u4_est_offset = est_strd - 16;
753     UWORD32 i, j;
754     WORD16 temp;
755 
756 GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8);
757 
758     for(i = 8; i > 0; i--)
759     {
760         for(j = 16; j > 0; j--)
761         {
762             /* SAD */
763             temp = *pu1_src++ - *pu1_est++;
764             i4_sad += ABS(temp);
765         }
766         /* early exit */
767         if(i4_max_sad < i4_sad)
768         {
769 
770 GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8-i);
771 
772             *pi4_mb_distortion = i4_sad;
773 
774             return;
775         }
776         pu1_src += u4_src_offset;
777         pu1_est += u4_est_offset;
778     }
779 
780     *pi4_mb_distortion = i4_sad;
781     return;
782 
783 }
784 
785 /**
786 ******************************************************************************
787 *
788 * @brief computes distortion (SAD) between 2 16x16 blocks
789 *
790 * @par   Description
791 *   This functions computes SAD between 2 16x16 blocks. There is a provision
792 *   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
793 *   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
794 *
795 * @param[in] pu1_src
796 *  UWORD8 pointer to the source
797 *
798 * @param[out] pu1_dst
799 *  UWORD8 pointer to the destination
800 *
801 * @param[in] src_strd
802 *  integer source stride
803 *
804 * @param[in] dst_strd
805 *  integer destination stride
806 *
807 * @param[in] i4_max_sad
808 *  integer maximum allowed distortion
809 *
810 * @param[out] pi4_mb_distortion
811 *  integer evaluated sad
812 *
813 * @remarks
814 *
815 ******************************************************************************
816 */
ime_compute_sad_16x16_ea8(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 i4_max_sad,WORD32 * pi4_mb_distortion)817 void ime_compute_sad_16x16_ea8(UWORD8 *pu1_src,
818                                UWORD8 *pu1_est,
819                                WORD32 src_strd,
820                                WORD32 est_strd,
821                                WORD32 i4_max_sad,
822                                WORD32 *pi4_mb_distortion)
823 {
824     WORD32 i4_sad = 0;
825     UWORD32 u4_src_offset = src_strd - 16;
826     UWORD32 u4_est_offset = est_strd - 16;
827     UWORD32 i, j;
828     WORD16 temp;
829     UWORD8 *pu1_src_temp = pu1_src + src_strd;
830     UWORD8 *pu1_est_temp = pu1_est + est_strd;
831 
832     for(i = 16; i > 0; i -= 2)
833     {
834         for(j = 16; j > 0; j--)
835         {
836             /* SAD */
837             temp = *pu1_src++ - *pu1_est++;
838             i4_sad += ABS(temp);
839         }
840 
841         pu1_src += (u4_src_offset + src_strd);
842         pu1_est += (u4_est_offset + est_strd);
843 
844     }
845 
846     /* early exit */
847     if(i4_max_sad < i4_sad)
848     {
849         *pi4_mb_distortion = i4_sad;
850         return;
851     }
852 
853     pu1_src = pu1_src_temp;
854     pu1_est = pu1_est_temp;
855 
856     for(i = 16; i > 0; i -= 2)
857     {
858         for(j = 16; j > 0; j--)
859         {
860             /* SAD */
861             temp = *pu1_src++ - *pu1_est++;
862             i4_sad += ABS(temp);
863         }
864 
865         pu1_src += u4_src_offset + src_strd;
866         pu1_est += u4_est_offset + est_strd;
867     }
868 
869     *pi4_mb_distortion = i4_sad;
870     return;
871 }
872 
873 
874 /**
875 *******************************************************************************
876 *
877 * @brief This function computes SAD between two 16x16 blocks
878 *        It also computes if the block will be zero after H264 transform and quant for
879 *        Intra 16x16 blocks
880 *
881 * @param[in] pu1_src
882 *  UWORD8 pointer to the source
883 *
884 * @param[out] pu1_dst
885 *  UWORD8 pointer to the destination
886 *
887 * @param[in] src_strd
888 *  integer source stride
889 *
890 * @param[in] dst_strd
891 *  integer destination stride
892 *
893 * @param[in] pu2_thrsh
894 *  Threshold for each element of transofrmed quantized block
895 *
896 * @param[out] pi4_mb_distortion
897 *  integer evaluated sad
898 *
899 * @param[out] pu4_is_zero
900 *  Poitner to store if the block is zero after transform and quantization
901 *
902 * @remarks
903 *
904 ******************************************************************************
905 */
ime_compute_satqd_16x16_lumainter(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,UWORD16 * pu2_thrsh,WORD32 * pi4_mb_distortion,UWORD32 * pu4_is_non_zero)906 void ime_compute_satqd_16x16_lumainter(UWORD8 *pu1_src,
907                                          UWORD8 *pu1_est,
908                                          WORD32 src_strd,
909                                          WORD32 est_strd,
910                                          UWORD16 *pu2_thrsh,
911                                          WORD32 *pi4_mb_distortion,
912                                          UWORD32 *pu4_is_non_zero)
913 {
914     UWORD32 i,j;
915     WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
916     UWORD8 *pu1_src_lp,*pu1_est_lp;
917     UWORD32 sad = 0;
918 
919     (*pi4_mb_distortion) = 0;
920     for(i=0;i<4;i++)
921     {
922         for(j=0;j<4;j++)
923         {
924             pu1_src_lp = pu1_src + 4*j;
925             pu1_est_lp = pu1_est + 4*j;
926 
927             s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
928             s4 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
929 
930             pu1_src_lp += src_strd;
931             pu1_est_lp += est_strd;
932 
933             s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
934             s3 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
935 
936             pu1_src_lp += src_strd;
937             pu1_est_lp += est_strd;
938 
939             s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
940             s3 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
941 
942             pu1_src_lp += src_strd;
943             pu1_est_lp += est_strd;
944 
945             s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
946             s4 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
947 
948             sad_1 = s1+s2+s3+s4;
949 
950             if(sad == 0)
951             {
952                 sad_2 = sad_1<<1;
953 
954                 ls1 = sad_2 -(s2 + s3);
955                 ls2 = sad_2 -(s1 + s4);
956                 ls3 = sad_2 -(s3 + s4);
957                 ls4 = sad_2 -(s3 - (s1<<1));
958                 ls5 = sad_2 -(s4 - (s2<<1));
959                 ls6 = sad_2 -(s1 + s2);
960                 ls7 = sad_2 -(s2 - (s4<<1));
961                 ls8 = sad_2 -(s1 - (s3<<1));
962 
963                 if(
964                         pu2_thrsh[8] <= sad_1   ||
965                         pu2_thrsh[0] <=  ls2    ||
966                         pu2_thrsh[1] <=  ls1    ||
967                         pu2_thrsh[2] <=  ls8    ||
968                         pu2_thrsh[3] <=  ls5    ||
969 
970                         pu2_thrsh[4] <=  ls6    ||
971                         pu2_thrsh[5] <=  ls3    ||
972                         pu2_thrsh[6] <=  ls7    ||
973                         pu2_thrsh[7] <=  ls4
974 
975                 )sad = 1;
976             }
977             (*pi4_mb_distortion) += sad_1;
978         }
979         pu1_src +=  (src_strd *4);
980         pu1_est +=  (est_strd *4);
981     }
982     *pu4_is_non_zero = sad;
983 }
984 
985 
986 /**
987 ******************************************************************************
988 *
989 * @brief computes distortion (SAD and SAQTD) between 2 16x8 (interleaved) chroma blocks
990 *
991 *
992 * @par   Description
993 *   This functions computes SAD between2 16x8 chroma blocks(interleaved)
994 *   It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
995 *   If SAQTD is zero, it gives back zero
996 *   Other wise sad is retrned
997 *   There is no provison for early exit
998 *
999 *   The transform done here is the transform for chroma blocks in H264
1000 *
1001 * @param[in] pu1_src
1002 *  UWORD8 pointer to the source
1003 *
1004 * @param[out] pu1_dst
1005 *  UWORD8 pointer to the destination
1006 *
1007 * @param[in] src_strd
1008 *  integer source stride
1009 *
1010 * @param[in] dst_strd
1011 *  integer destination stride
1012 *
1013 * @param[in] pu2_thrsh
1014 *  Threshold for each element of transofrmed quantized block
1015 *
1016 * @param[out] pi4_mb_distortion
1017 *  integer evaluated sad
1018 *
1019 * @remarks
1020 * Fucntion code is nit updated.
1021 * Will require debugging and minor modifications
1022 *
1023 ******************************************************************************
1024 */
ime_compute_satqd_8x16_chroma(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 max_sad,UWORD16 * thrsh)1025 void ime_compute_satqd_8x16_chroma(UWORD8 *pu1_src,
1026                                      UWORD8 *pu1_est,
1027                                      WORD32 src_strd,
1028                                      WORD32 est_strd,
1029                                      WORD32 max_sad,
1030                                      UWORD16 *thrsh)
1031 {
1032     WORD32 i,j,plane;
1033     WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
1034     UWORD8 *pu1_src_lp,*pu1_est_lp,*pu1_src_plane,*pu1_est_plane;
1035     WORD32 sad =0;
1036     UNUSED(max_sad);
1037 
1038     pu1_src_plane = pu1_src;
1039     pu1_est_plane = pu1_est;
1040 
1041     for(plane =0;plane<2;plane++)
1042     {
1043         for(i=0;i<4;i++)
1044         {
1045             for(j=0;j<4;j++)
1046             {
1047                 pu1_src_lp = pu1_src + 8*j;
1048                 pu1_est_lp = pu1_est + 8*j;
1049 
1050                 s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1051                 s4 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1052 
1053                 pu1_src_lp += src_strd;
1054                 pu1_est_lp += est_strd;
1055 
1056                 s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1057                 s3 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1058 
1059                 pu1_src_lp += src_strd;
1060                 pu1_est_lp += est_strd;
1061 
1062                 s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1063                 s3 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1064 
1065                 pu1_src_lp += src_strd;
1066                 pu1_est_lp += est_strd;
1067 
1068                 s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1069                 s4 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1070 
1071                 sad_1 = s1+s2+s3+s4;
1072                 sad_2 = sad_1<<1;
1073 
1074                 ls1 = sad_2 -(s2 + s3);
1075                 ls2 = sad_2 -(s1 + s4);
1076                 ls3 = sad_2 -(s3 + s4);
1077                 ls4 = sad_2 -(s3 - (s1<<1));
1078                 ls5 = sad_2 -(s4 - (s2<<1));
1079                 ls6 = sad_2 -(s1 + s2);
1080                 ls7 = sad_2 -(s2 - (s4<<1));
1081                 ls8 = sad_2 -(s1 - (s3<<1));
1082 
1083                 if(
1084                         //thrsh[0] >  sad_1     && Chroma Dc is checked later
1085                         thrsh[1] >  ls1     &&
1086                         thrsh[2] >  sad_1   &&
1087                         thrsh[3] >  ls2     &&
1088 
1089                         thrsh[4] >  ls3     &&
1090                         thrsh[5] >  ls4     &&
1091                         thrsh[6] >  ls3     &&
1092                         thrsh[7] >  ls5     &&
1093 
1094                         thrsh[8] >  sad_1   &&
1095                         thrsh[9] >  ls1     &&
1096                         thrsh[10]>  sad_1   &&
1097                         thrsh[11]>  ls2     &&
1098 
1099                         thrsh[12]>  ls6     &&
1100                         thrsh[13]>  ls7     &&
1101                         thrsh[14]>  ls6     &&
1102                         thrsh[15]>  ls8
1103                 )
1104                 {
1105                     /*set current sad to be zero*/
1106                 }
1107                 else
1108                     return ;
1109 
1110                 sad += sad_1;
1111             }
1112             pu1_src +=  (src_strd *4);
1113             pu1_est +=  (est_strd *4);
1114         }
1115         if(sad < (thrsh[0]<<1))sad = 0;
1116         else return ;
1117 
1118         pu1_src = pu1_src_plane+1;
1119         pu1_est = pu1_est_plane+1;
1120     }
1121     return ;
1122 }
1123 
1124 
1125 /**
1126 ******************************************************************************
1127 *
1128 * @brief computes distortion (SAD and SAQTD) between 2 16x16 blocks
1129 *
1130 * @par   Description
1131 *   This functions computes SAD between 2 16x16 blocks.
1132 *   It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
1133 *   If SAQTD is zero, it gives back zero
1134 *   Other wise sad is retrned
1135 *   There is no provison for early exit
1136 *
1137 *   The transform done here is the transform for inter 16x16 blocks in H264
1138 *
1139 * @param[in] pu1_src
1140 *  UWORD8 pointer to the source
1141 *
1142 * @param[out] pu1_dst
1143 *  UWORD8 pointer to the destination
1144 *
1145 * @param[in] src_strd
1146 *  integer source stride
1147 *
1148 * @param[in] dst_strd
1149 *  integer destination stride
1150 *
1151 * @param[in] pu2_thrsh
1152 *  Threshold for each element of transofrmed quantized block
1153 *
1154 * @param[out] pi4_mb_distortion
1155 *  integer evaluated sad
1156 *
1157 * @remarks
1158 *
1159 ******************************************************************************
1160 */
ime_compute_satqd_16x16_lumaintra(UWORD8 * pu1_src,UWORD8 * pu1_est,WORD32 src_strd,WORD32 est_strd,WORD32 max_sad,UWORD16 * thrsh,WORD32 * pi4_mb_distortion,UWORD8 * sig_nz_sad)1161 void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src,
1162                                          UWORD8 *pu1_est,
1163                                          WORD32 src_strd,
1164                                          WORD32 est_strd,
1165                                          WORD32 max_sad,
1166                                          UWORD16 *thrsh,
1167                                          WORD32 *pi4_mb_distortion,
1168                                          UWORD8 *sig_nz_sad)
1169 {
1170     UWORD32 i,j;
1171     WORD16 s1[4],s2[4],s3[4],s4[4],sad[4];
1172     UWORD8 *pu1_src_lp,*pu1_est_lp;
1173     UWORD8 *sig_sad_dc;
1174     UWORD32 nz_sad_sig = 0;
1175     UNUSED(max_sad);
1176     *pi4_mb_distortion =0;
1177 
1178     sig_sad_dc = sig_nz_sad;
1179     sig_nz_sad++;
1180 
1181     for(i=0;i<4;i++)
1182     {
1183         for(j=0;j<4;j++)
1184         {
1185             pu1_src_lp = pu1_src + 4*j;
1186             pu1_est_lp = pu1_est + 4*j;
1187 
1188             s1[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1189             s4[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1190 
1191             pu1_src_lp += src_strd;
1192             pu1_est_lp += est_strd;
1193 
1194             s2[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1195             s3[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1196 
1197             pu1_src_lp += src_strd;
1198             pu1_est_lp += est_strd;
1199 
1200             s2[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1201             s3[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1202 
1203             pu1_src_lp += src_strd;
1204             pu1_est_lp += est_strd;
1205 
1206             s1[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1207             s4[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1208 
1209             sad[j] = ((s1[j]+s2[j]+s3[j]+s4[j])<<1);
1210         }
1211 
1212         for(j=0;j<4;j++)
1213         {
1214 
1215             if(
1216                     //thrsh[0] > (sad[j] >> 1) &&Dc goes in the other part
1217                     thrsh[1] > (sad[j] -(s2[j] + s3[j])) &&
1218                     thrsh[2] > (sad[j]>>1) &&
1219                     thrsh[3] > (sad[j] -(s1[j] + s4[j])) &&
1220 
1221                     thrsh[4] > (sad[j] -(s3[j] + s4[j])) &&
1222                     thrsh[5] > (sad[j] -(s3[j] - (s1[j]<<1))) &&
1223                     thrsh[6] > (sad[j] -(s3[j] + s4[j])) &&
1224                     thrsh[7] > (sad[j] -(s4[j] - (s2[j]<<1))) &&
1225 
1226                     thrsh[8] > (sad[j]>>1) &&
1227                     thrsh[9] > (sad[j] -(s2[j] + s3[j])) &&
1228                     thrsh[10]> (sad[j]>>1) &&
1229                     thrsh[11]> (sad[j] -(s1[j] + s4[j])) &&
1230 
1231                     thrsh[12]> (sad[j] -(s1[j] + s2[j])) &&
1232                     thrsh[13]> (sad[j] -(s2[j] - (s4[j]<<1))) &&
1233                     thrsh[14]> (sad[j] -(s1[j] + s2[j])) &&
1234                     thrsh[15]> (sad[j] -(s1[j] - (s3[j]<<1)))
1235             )
1236             {
1237                 //sad[j] = 0;   /*set current sad to be zero*/
1238                 sig_nz_sad[j] = 0;/*Signal that the sad is zero*/
1239             }
1240             else
1241             {
1242                 sig_nz_sad[j] = 1;/*signal that sad is non zero*/
1243                 nz_sad_sig = 1;
1244             }
1245 
1246             (*pi4_mb_distortion) += (sad[j]>>1);
1247             //if((*pi4_mb_distortion) >= max_sad)return; /*return or some thing*/
1248         }
1249 
1250         sig_nz_sad += 4;
1251         pu1_src +=  (src_strd *4);
1252         pu1_est +=  (est_strd *4);
1253     }
1254 
1255     if((*pi4_mb_distortion) < thrsh[0]<<2)
1256     {
1257         *sig_sad_dc = 0;
1258         if(nz_sad_sig == 0)(*pi4_mb_distortion) = 0;
1259     }
1260     else *sig_sad_dc = 1;
1261 }
1262 
1263 
1264