• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #ifndef WIN32
13 # include <unistd.h>
14 #endif
15 #ifdef __APPLE__
16 #include <mach/mach_init.h>
17 #endif
18 #include "onyxd_int.h"
19 #include "vpx_mem/vpx_mem.h"
20 #include "threading.h"
21 
22 #include "loopfilter.h"
23 #include "extend.h"
24 #include "vpx_ports/vpx_timer.h"
25 #include "detokenize.h"
26 #include "reconinter.h"
27 #include "reconintra_mt.h"
28 
29 extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
30 extern void clamp_mvs(MACROBLOCKD *xd);
31 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
32 
33 #if CONFIG_RUNTIME_CPU_DETECT
34 #define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
35 #else
36 #define RTCD_VTABLE(x) NULL
37 #endif
38 
vp8_setup_decoding_thread_data(VP8D_COMP * pbi,MACROBLOCKD * xd,MB_ROW_DEC * mbrd,int count)39 void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
40 {
41 #if CONFIG_MULTITHREAD
42     VP8_COMMON *const pc = & pbi->common;
43     int i, j;
44 
45     for (i = 0; i < count; i++)
46     {
47         MACROBLOCKD *mbd = &mbrd[i].mbd;
48 #if CONFIG_RUNTIME_CPU_DETECT
49         mbd->rtcd = xd->rtcd;
50 #endif
51         mbd->subpixel_predict        = xd->subpixel_predict;
52         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
53         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
54         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
55 
56         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
57         mbd->mode_info_stride  = pc->mode_info_stride;
58 
59         mbd->frame_type = pc->frame_type;
60         mbd->frames_since_golden      = pc->frames_since_golden;
61         mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
62 
63         mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
64         mbd->dst = pc->yv12_fb[pc->new_fb_idx];
65 
66         vp8_setup_block_dptrs(mbd);
67         vp8_build_block_doffsets(mbd);
68         mbd->segmentation_enabled    = xd->segmentation_enabled;
69         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
70         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
71 
72         /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
73         vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
74         /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
75         vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
76         /*unsigned char mode_ref_lf_delta_enabled;
77         unsigned char mode_ref_lf_delta_update;*/
78         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
79         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
80 
81         mbd->current_bc = &pbi->bc2;
82 
83         for (j = 0; j < 25; j++)
84         {
85             mbd->block[j].dequant = xd->block[j].dequant;
86         }
87     }
88 
89     for (i=0; i< pc->mb_rows; i++)
90         pbi->mt_current_mb_col[i]=-1;
91 #else
92     (void) pbi;
93     (void) xd;
94     (void) mbrd;
95     (void) count;
96 #endif
97 }
98 
99 
vp8mt_decode_macroblock(VP8D_COMP * pbi,MACROBLOCKD * xd,int mb_row,int mb_col)100 void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
101 {
102 #if CONFIG_MULTITHREAD
103     int eobtotal = 0;
104     int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
105     VP8_COMMON *pc = &pbi->common;
106 
107     if (xd->mode_info_context->mbmi.mb_skip_coeff)
108     {
109         vp8_reset_mb_tokens_context(xd);
110     }
111     else
112     {
113         eobtotal = vp8_decode_mb_tokens(pbi, xd);
114     }
115 
116     /* Perform temporary clamping of the MV to be used for prediction */
117     if (do_clamp)
118     {
119         clamp_mvs(xd);
120     }
121 
122     xd->mode_info_context->mbmi.dc_diff = 1;
123 
124     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
125     {
126         xd->mode_info_context->mbmi.dc_diff = 0;
127 
128         /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
129         if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
130         {
131             vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
132             vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
133         }
134         else
135         {
136             vp8_build_inter_predictors_mb_s(xd);
137         }
138         return;
139     }
140 
141     if (xd->segmentation_enabled)
142         mb_init_dequantizer(pbi, xd);
143 
144     /* do prediction */
145     if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
146     {
147         vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
148 
149         if (xd->mode_info_context->mbmi.mode != B_PRED)
150         {
151             vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
152         } else {
153             vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
154         }
155     }
156     else
157     {
158         vp8_build_inter_predictors_mb(xd);
159     }
160 
161     /* dequantization and idct */
162     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
163     {
164         BLOCKD *b = &xd->block[24];
165         DEQUANT_INVOKE(&pbi->dequant, block)(b);
166 
167         /* do 2nd order transform on the dc block */
168         if (xd->eobs[24] > 1)
169         {
170             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
171             ((int *)b->qcoeff)[0] = 0;
172             ((int *)b->qcoeff)[1] = 0;
173             ((int *)b->qcoeff)[2] = 0;
174             ((int *)b->qcoeff)[3] = 0;
175             ((int *)b->qcoeff)[4] = 0;
176             ((int *)b->qcoeff)[5] = 0;
177             ((int *)b->qcoeff)[6] = 0;
178             ((int *)b->qcoeff)[7] = 0;
179         }
180         else
181         {
182             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
183             ((int *)b->qcoeff)[0] = 0;
184         }
185 
186         DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
187                         (xd->qcoeff, xd->block[0].dequant,
188                          xd->predictor, xd->dst.y_buffer,
189                          xd->dst.y_stride, xd->eobs, xd->block[24].diff);
190     }
191     else if ((xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
192     {
193         for (i = 0; i < 16; i++)
194         {
195             BLOCKD *b = &xd->block[i];
196             vp8mt_predict_intra4x4(pbi, xd, b->bmi.mode, b->predictor, mb_row, mb_col, i);
197 
198             if (xd->eobs[i] > 1)
199             {
200                 DEQUANT_INVOKE(&pbi->dequant, idct_add)
201                     (b->qcoeff, b->dequant,  b->predictor,
202                     *(b->base_dst) + b->dst, 16, b->dst_stride);
203             }
204             else
205             {
206                 IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
207                     (b->qcoeff[0] * b->dequant[0], b->predictor,
208                     *(b->base_dst) + b->dst, 16, b->dst_stride);
209                 ((int *)b->qcoeff)[0] = 0;
210             }
211         }
212     }
213     else
214     {
215         DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
216                         (xd->qcoeff, xd->block[0].dequant,
217                          xd->predictor, xd->dst.y_buffer,
218                          xd->dst.y_stride, xd->eobs);
219     }
220 
221     DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
222                     (xd->qcoeff+16*16, xd->block[16].dequant,
223                      xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
224                      xd->dst.uv_stride, xd->eobs+16);
225 #else
226     (void) pbi;
227     (void) xd;
228     (void) mb_row;
229     (void) mb_col;
230 #endif
231 }
232 
233 
vp8_thread_decoding_proc(void * p_data)234 THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
235 {
236 #if CONFIG_MULTITHREAD
237     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
238     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
239     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
240     ENTROPY_CONTEXT_PLANES mb_row_left_context;
241 
242     while (1)
243     {
244         if (pbi->b_multithreaded_rd == 0)
245             break;
246 
247         /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
248         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
249         {
250             if (pbi->b_multithreaded_rd == 0)
251                 break;
252             else
253             {
254                 VP8_COMMON *pc = &pbi->common;
255                 MACROBLOCKD *xd = &mbrd->mbd;
256 
257                 int mb_row;
258                 int num_part = 1 << pbi->common.multi_token_partition;
259                 volatile int *last_row_current_mb_col;
260                 int nsync = pbi->sync_range;
261 
262                 for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
263                 {
264                     int i;
265                     int recon_yoffset, recon_uvoffset;
266                     int mb_col;
267                     int ref_fb_idx = pc->lst_fb_idx;
268                     int dst_fb_idx = pc->new_fb_idx;
269                     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
270                     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
271 
272                     int filter_level;
273                     loop_filter_info *lfi = pc->lf_info;
274                     int alt_flt_enabled = xd->segmentation_enabled;
275                     int Segment;
276 
277                     pbi->mb_row_di[ithread].mb_row = mb_row;
278                     pbi->mb_row_di[ithread].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
279 
280                     last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
281 
282                     recon_yoffset = mb_row * recon_y_stride * 16;
283                     recon_uvoffset = mb_row * recon_uv_stride * 8;
284                     /* reset above block coeffs */
285 
286                     xd->above_context = pc->above_context;
287                     xd->left_context = &mb_row_left_context;
288                     vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
289                     xd->up_available = (mb_row != 0);
290 
291                     xd->mb_to_top_edge = -((mb_row * 16)) << 3;
292                     xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
293 
294                     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
295                     {
296                         if ((mb_col & (nsync-1)) == 0)
297                         {
298                             while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
299                             {
300                                 x86_pause_hint();
301                                 thread_sleep(0);
302                             }
303                         }
304 
305                         if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
306                         {
307                             for (i = 0; i < 16; i++)
308                             {
309                                 BLOCKD *d = &xd->block[i];
310                                 vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
311                             }
312                         }
313 
314                         if(pbi->common.filter_level)
315                         {
316                             /*update loopfilter info*/
317                             Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
318                             filter_level = pbi->mt_baseline_filter_level[Segment];
319                             /* Distance of Mb to the various image edges.
320                              * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
321                              * Apply any context driven MB level adjustment
322                              */
323                             vp8_adjust_mb_lf_value(xd, &filter_level);
324                         }
325 
326                         /* Distance of Mb to the various image edges.
327                          * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
328                          */
329                         xd->mb_to_left_edge = -((mb_col * 16) << 3);
330                         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
331 
332                         xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
333                         xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
334                         xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
335 
336                         xd->left_available = (mb_col != 0);
337 
338                         /* Select the appropriate reference frame for this MB */
339                         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
340                             ref_fb_idx = pc->lst_fb_idx;
341                         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
342                             ref_fb_idx = pc->gld_fb_idx;
343                         else
344                             ref_fb_idx = pc->alt_fb_idx;
345 
346                         xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
347                         xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
348                         xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
349 
350                         vp8_build_uvmvs(xd, pc->full_pixel);
351                         vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
352 
353                         if (pbi->common.filter_level)
354                         {
355                             if( mb_row != pc->mb_rows-1 )
356                             {
357                                 /* Save decoded MB last row data for next-row decoding */
358                                 vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
359                                 vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
360                                 vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
361                             }
362 
363                             /* save left_col for next MB decoding */
364                             if(mb_col != pc->mb_cols-1)
365                             {
366                                 MODE_INFO *next = xd->mode_info_context +1;
367 
368                                 if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
369                                 {
370                                     for (i = 0; i < 16; i++)
371                                         pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
372                                     for (i = 0; i < 8; i++)
373                                     {
374                                         pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
375                                         pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
376                                     }
377                                 }
378                             }
379 
380                           /* loopfilter on this macroblock. */
381                             if (filter_level)
382                             {
383                                 if (mb_col > 0)
384                                     pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
385 
386                                 if (xd->mode_info_context->mbmi.dc_diff > 0)
387                                     pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
388 
389                                 /* don't apply across umv border */
390                                 if (mb_row > 0)
391                                     pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
392 
393                                 if (xd->mode_info_context->mbmi.dc_diff > 0)
394                                     pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
395                             }
396                         }
397 
398                         recon_yoffset += 16;
399                         recon_uvoffset += 8;
400 
401                         ++xd->mode_info_context;  /* next mb */
402 
403                         xd->above_context++;
404 
405                         /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
406                         pbi->mt_current_mb_col[mb_row] = mb_col;
407                     }
408 
409                     /* adjust to the next row of mbs */
410                     if (pbi->common.filter_level)
411                     {
412                         if(mb_row != pc->mb_rows-1)
413                         {
414                             int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
415                             int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
416 
417                             for (i = 0; i < 4; i++)
418                             {
419                                 pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
420                                 pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
421                                 pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
422                             }
423                         }
424                     } else
425                         vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
426 
427                     ++xd->mode_info_context;      /* skip prediction column */
428 
429                     /* since we have multithread */
430                     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
431                 }
432             }
433         }
434         /*  add this to each frame */
435         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
436         {
437             /*SetEvent(pbi->h_event_end_decoding);*/
438             sem_post(&pbi->h_event_end_decoding);
439         }
440     }
441 #else
442     (void) p_data;
443 #endif
444 
445     return 0 ;
446 }
447 
448 
vp8_decoder_create_threads(VP8D_COMP * pbi)449 void vp8_decoder_create_threads(VP8D_COMP *pbi)
450 {
451 #if CONFIG_MULTITHREAD
452     int core_count = 0;
453     int ithread;
454     int i;
455 
456     pbi->b_multithreaded_rd = 0;
457     pbi->allocated_decoding_thread_count = 0;
458     core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
459 
460     if (core_count > 1)
461     {
462         pbi->b_multithreaded_rd = 1;
463         pbi->decoding_thread_count = core_count -1;
464 
465         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
466         CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
467         CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
468         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
469         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
470 
471         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
472         {
473             sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
474 
475             pbi->de_thread_data[ithread].ithread  = ithread;
476             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
477             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
478 
479             pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
480         }
481 
482         sem_init(&pbi->h_event_end_decoding, 0, 0);
483 
484         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
485     }
486 
487 #else
488     (void) pbi;
489 #endif
490 }
491 
492 
vp8mt_de_alloc_temp_buffers(VP8D_COMP * pbi,int mb_rows)493 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
494 {
495 #if CONFIG_MULTITHREAD
496     VP8_COMMON *const pc = & pbi->common;
497     int i;
498 
499     if (pbi->b_multithreaded_rd)
500     {
501         if (pbi->mt_current_mb_col)
502         {
503             vpx_free(pbi->mt_current_mb_col);
504             pbi->mt_current_mb_col = NULL ;
505         }
506 
507         /* Free above_row buffers. */
508         if (pbi->mt_yabove_row)
509         {
510             for (i=0; i< mb_rows; i++)
511             {
512                 if (pbi->mt_yabove_row[i])
513                 {
514                     vpx_free(pbi->mt_yabove_row[i]);
515                     pbi->mt_yabove_row[i] = NULL ;
516                 }
517             }
518             vpx_free(pbi->mt_yabove_row);
519             pbi->mt_yabove_row = NULL ;
520         }
521 
522         if (pbi->mt_uabove_row)
523         {
524             for (i=0; i< mb_rows; i++)
525             {
526                 if (pbi->mt_uabove_row[i])
527                 {
528                     vpx_free(pbi->mt_uabove_row[i]);
529                     pbi->mt_uabove_row[i] = NULL ;
530                 }
531             }
532             vpx_free(pbi->mt_uabove_row);
533             pbi->mt_uabove_row = NULL ;
534         }
535 
536         if (pbi->mt_vabove_row)
537         {
538             for (i=0; i< mb_rows; i++)
539             {
540                 if (pbi->mt_vabove_row[i])
541                 {
542                     vpx_free(pbi->mt_vabove_row[i]);
543                     pbi->mt_vabove_row[i] = NULL ;
544                 }
545             }
546             vpx_free(pbi->mt_vabove_row);
547             pbi->mt_vabove_row = NULL ;
548         }
549 
550         /* Free left_col buffers. */
551         if (pbi->mt_yleft_col)
552         {
553             for (i=0; i< mb_rows; i++)
554             {
555                 if (pbi->mt_yleft_col[i])
556                 {
557                     vpx_free(pbi->mt_yleft_col[i]);
558                     pbi->mt_yleft_col[i] = NULL ;
559                 }
560             }
561             vpx_free(pbi->mt_yleft_col);
562             pbi->mt_yleft_col = NULL ;
563         }
564 
565         if (pbi->mt_uleft_col)
566         {
567             for (i=0; i< mb_rows; i++)
568             {
569                 if (pbi->mt_uleft_col[i])
570                 {
571                     vpx_free(pbi->mt_uleft_col[i]);
572                     pbi->mt_uleft_col[i] = NULL ;
573                 }
574             }
575             vpx_free(pbi->mt_uleft_col);
576             pbi->mt_uleft_col = NULL ;
577         }
578 
579         if (pbi->mt_vleft_col)
580         {
581             for (i=0; i< mb_rows; i++)
582             {
583                 if (pbi->mt_vleft_col[i])
584                 {
585                     vpx_free(pbi->mt_vleft_col[i]);
586                     pbi->mt_vleft_col[i] = NULL ;
587                 }
588             }
589             vpx_free(pbi->mt_vleft_col);
590             pbi->mt_vleft_col = NULL ;
591         }
592     }
593 #else
594     (void) pbi;
595 #endif
596 }
597 
598 
vp8mt_alloc_temp_buffers(VP8D_COMP * pbi,int width,int prev_mb_rows)599 int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
600 {
601 #if CONFIG_MULTITHREAD
602     VP8_COMMON *const pc = & pbi->common;
603     int i;
604     int uv_width;
605 
606     if (pbi->b_multithreaded_rd)
607     {
608         vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
609 
610         /* our internal buffers are always multiples of 16 */
611         if ((width & 0xf) != 0)
612             width += 16 - (width & 0xf);
613 
614         if (width < 640) pbi->sync_range = 1;
615         else if (width <= 1280) pbi->sync_range = 8;
616         else if (width <= 2560) pbi->sync_range =16;
617         else pbi->sync_range = 32;
618 
619         uv_width = width >>1;
620 
621         /* Allocate an int for each mb row. */
622         CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
623 
624         /* Allocate memory for above_row buffers. */
625         CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
626         for (i=0; i< pc->mb_rows; i++)
627             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
628 
629         CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
630         for (i=0; i< pc->mb_rows; i++)
631             CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
632 
633         CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
634         for (i=0; i< pc->mb_rows; i++)
635             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
636 
637         /* Allocate memory for left_col buffers. */
638         CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
639         for (i=0; i< pc->mb_rows; i++)
640             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
641 
642         CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
643         for (i=0; i< pc->mb_rows; i++)
644             CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
645 
646         CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
647         for (i=0; i< pc->mb_rows; i++)
648             CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
649     }
650     return 0;
651 #else
652     (void) pbi;
653     (void) width;
654 #endif
655 }
656 
657 
vp8_decoder_remove_threads(VP8D_COMP * pbi)658 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
659 {
660 #if CONFIG_MULTITHREAD
661 
662     /* shutdown MB Decoding thread; */
663     if (pbi->b_multithreaded_rd)
664     {
665         int i;
666 
667         pbi->b_multithreaded_rd = 0;
668 
669         /* allow all threads to exit */
670         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
671         {
672             sem_post(&pbi->h_event_start_decoding[i]);
673             pthread_join(pbi->h_decoding_thread[i], NULL);
674         }
675 
676         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
677         {
678             sem_destroy(&pbi->h_event_start_decoding[i]);
679         }
680 
681         sem_destroy(&pbi->h_event_end_decoding);
682 
683         if (pbi->h_decoding_thread)
684         {
685             vpx_free(pbi->h_decoding_thread);
686             pbi->h_decoding_thread = NULL;
687         }
688 
689         if (pbi->h_event_start_decoding)
690         {
691             vpx_free(pbi->h_event_start_decoding);
692             pbi->h_event_start_decoding = NULL;
693         }
694 
695         if (pbi->mb_row_di)
696         {
697             vpx_free(pbi->mb_row_di);
698             pbi->mb_row_di = NULL ;
699         }
700 
701         if (pbi->de_thread_data)
702         {
703             vpx_free(pbi->de_thread_data);
704             pbi->de_thread_data = NULL;
705         }
706     }
707 #else
708     (void) pbi;
709 #endif
710 }
711 
712 
vp8mt_lpf_init(VP8D_COMP * pbi,int default_filt_lvl)713 void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
714 {
715 #if CONFIG_MULTITHREAD
716     VP8_COMMON *cm  = &pbi->common;
717     MACROBLOCKD *mbd = &pbi->mb;
718     /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/  /*frame_to_show;*/
719     loop_filter_info *lfi = cm->lf_info;
720     FRAME_TYPE frame_type = cm->frame_type;
721 
722     /*int mb_row;
723     int mb_col;
724     int baseline_filter_level[MAX_MB_SEGMENTS];*/
725     int filter_level;
726     int alt_flt_enabled = mbd->segmentation_enabled;
727 
728     int i;
729     /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
730 
731     /* Note the baseline filter values for each segment */
732     if (alt_flt_enabled)
733     {
734         for (i = 0; i < MAX_MB_SEGMENTS; i++)
735         {
736             /* Abs value */
737             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
738                 pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
739             /* Delta Value */
740             else
741             {
742                 pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
743                 pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
744             }
745         }
746     }
747     else
748     {
749         for (i = 0; i < MAX_MB_SEGMENTS; i++)
750             pbi->mt_baseline_filter_level[i] = default_filt_lvl;
751     }
752 
753     /* Initialize the loop filter for this frame. */
754     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
755         vp8_init_loop_filter(cm);
756     else if (frame_type != cm->last_frame_type)
757         vp8_frame_init_loop_filter(lfi, frame_type);
758 #else
759     (void) pbi;
760     (void) default_filt_lvl;
761 #endif
762 }
763 
764 
vp8mt_decode_mb_rows(VP8D_COMP * pbi,MACROBLOCKD * xd)765 void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
766 {
767 #if CONFIG_MULTITHREAD
768     int mb_row;
769     VP8_COMMON *pc = &pbi->common;
770 
771     int ibc = 0;
772     int num_part = 1 << pbi->common.multi_token_partition;
773     int i, j;
774     volatile int *last_row_current_mb_col = NULL;
775     int nsync = pbi->sync_range;
776 
777     int filter_level;
778     loop_filter_info *lfi = pc->lf_info;
779     int alt_flt_enabled = xd->segmentation_enabled;
780     int Segment;
781 
782     if(pbi->common.filter_level)
783     {
784         /* Set above_row buffer to 127 for decoding first MB row */
785         vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
786         vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
787         vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
788 
789         for (i=1; i<pc->mb_rows; i++)
790         {
791             vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
792             vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
793             vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
794         }
795 
796         /* Set left_col to 129 initially */
797         for (i=0; i<pc->mb_rows; i++)
798         {
799             vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
800             vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
801             vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
802         }
803         vp8mt_lpf_init(pbi, pc->filter_level);
804     }
805 
806     vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
807 
808     for (i = 0; i < pbi->decoding_thread_count; i++)
809         sem_post(&pbi->h_event_start_decoding[i]);
810 
811     for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
812     {
813         int i;
814 
815         xd->current_bc = &pbi->mbc[mb_row%num_part];
816 
817         /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
818         {
819             int i;
820             int recon_yoffset, recon_uvoffset;
821             int mb_col;
822             int ref_fb_idx = pc->lst_fb_idx;
823             int dst_fb_idx = pc->new_fb_idx;
824             int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
825             int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
826 
827            /* volatile int *last_row_current_mb_col = NULL; */
828             if (mb_row > 0)
829                 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
830 
831             vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
832             recon_yoffset = mb_row * recon_y_stride * 16;
833             recon_uvoffset = mb_row * recon_uv_stride * 8;
834             /* reset above block coeffs */
835 
836             xd->above_context = pc->above_context;
837             xd->up_available = (mb_row != 0);
838 
839             xd->mb_to_top_edge = -((mb_row * 16)) << 3;
840             xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
841 
842             for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
843             {
844                 if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
845                     while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
846                     {
847                         x86_pause_hint();
848                         thread_sleep(0);
849                     }
850                 }
851 
852                 if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
853                 {
854                     for (i = 0; i < 16; i++)
855                     {
856                         BLOCKD *d = &xd->block[i];
857                         vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
858                     }
859                 }
860 
861                 if(pbi->common.filter_level)
862                 {
863                     /* update loopfilter info */
864                     Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
865                     filter_level = pbi->mt_baseline_filter_level[Segment];
866                     /* Distance of Mb to the various image edges.
867                      * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
868                      * Apply any context driven MB level adjustment
869                      */
870                     vp8_adjust_mb_lf_value(xd, &filter_level);
871                 }
872 
873                 /* Distance of Mb to the various image edges.
874                  * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
875                  */
876                 xd->mb_to_left_edge = -((mb_col * 16) << 3);
877                 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
878 
879                 xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
880                 xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
881                 xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
882 
883                 xd->left_available = (mb_col != 0);
884 
885                 /* Select the appropriate reference frame for this MB */
886                 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
887                     ref_fb_idx = pc->lst_fb_idx;
888                 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
889                     ref_fb_idx = pc->gld_fb_idx;
890                 else
891                     ref_fb_idx = pc->alt_fb_idx;
892 
893                 xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
894                 xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
895                 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
896 
897                 vp8_build_uvmvs(xd, pc->full_pixel);
898                 vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
899 
900                 if (pbi->common.filter_level)
901                 {
902                     /* Save decoded MB last row data for next-row decoding */
903                     if(mb_row != pc->mb_rows-1)
904                     {
905                         vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
906                         vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
907                         vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
908                     }
909 
910                     /* save left_col for next MB decoding */
911                     if(mb_col != pc->mb_cols-1)
912                     {
913                         MODE_INFO *next = xd->mode_info_context +1;
914 
915                         if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
916                         {
917                             for (i = 0; i < 16; i++)
918                                 pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
919                             for (i = 0; i < 8; i++)
920                             {
921                                 pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
922                                 pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
923                             }
924                         }
925                     }
926 
927                     /* loopfilter on this macroblock. */
928                     if (filter_level)
929                     {
930                         if (mb_col > 0)
931                             pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
932 
933                         if (xd->mode_info_context->mbmi.dc_diff > 0)
934                             pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
935 
936                         /* don't apply across umv border */
937                         if (mb_row > 0)
938                             pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
939 
940                         if (xd->mode_info_context->mbmi.dc_diff > 0)
941                             pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
942                     }
943                 }
944 
945                 recon_yoffset += 16;
946                 recon_uvoffset += 8;
947 
948                 ++xd->mode_info_context;  /* next mb */
949 
950                 xd->above_context++;
951 
952                 pbi->mt_current_mb_col[mb_row] = mb_col;
953             }
954 
955             /* adjust to the next row of mbs */
956             if (pbi->common.filter_level)
957             {
958                 if(mb_row != pc->mb_rows-1)
959                 {
960                     int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
961                     int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
962 
963                     for (i = 0; i < 4; i++)
964                     {
965                         pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
966                         pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
967                         pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
968                     }
969                 }
970             }else
971                 vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
972 
973             ++xd->mode_info_context;      /* skip prediction column */
974         }
975         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
976     }
977 
978     sem_wait(&pbi->h_event_end_decoding);   /* add back for each frame */
979 #else
980     (void) pbi;
981     (void) xd;
982 #endif
983 }
984