• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "onyx_int.h"
12 #include "vp8/common/threading.h"
13 #include "vp8/common/common.h"
14 #include "vp8/common/extend.h"
15 #include "bitstream.h"
16 #include "encodeframe.h"
17 
18 #if CONFIG_MULTITHREAD
19 
20 extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x,
21                                     int ok_to_skip);
22 
thread_loopfilter(void * p_data)23 static THREAD_FUNCTION thread_loopfilter(void *p_data) {
24   VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1);
25   VP8_COMMON *cm = &cpi->common;
26 
27   while (1) {
28     if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
29 
30     if (sem_wait(&cpi->h_event_start_lpf) == 0) {
31       /* we're shutting down */
32       if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
33 
34       vp8_loopfilter_frame(cpi, cm);
35 
36       sem_post(&cpi->h_event_end_lpf);
37     }
38   }
39 
40   return 0;
41 }
42 
thread_encoding_proc(void * p_data)43 static THREAD_FUNCTION thread_encoding_proc(void *p_data) {
44   int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread;
45   VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1);
46   MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2);
47   ENTROPY_CONTEXT_PLANES mb_row_left_context;
48 
49   while (1) {
50     if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
51 
52     if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) {
53       const int nsync = cpi->mt_sync_range;
54       VP8_COMMON *cm = &cpi->common;
55       int mb_row;
56       MACROBLOCK *x = &mbri->mb;
57       MACROBLOCKD *xd = &x->e_mbd;
58       TOKENEXTRA *tp;
59 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
60       TOKENEXTRA *tp_start = cpi->tok + (1 + ithread) * (16 * 24);
61       const int num_part = (1 << cm->multi_token_partition);
62 #endif
63 
64       int *segment_counts = mbri->segment_counts;
65       int *totalrate = &mbri->totalrate;
66 
67       /* we're shutting down */
68       if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0) break;
69 
70       xd->mode_info_context = cm->mi + cm->mode_info_stride * (ithread + 1);
71       xd->mode_info_stride = cm->mode_info_stride;
72 
73       for (mb_row = ithread + 1; mb_row < cm->mb_rows;
74            mb_row += (cpi->encoding_thread_count + 1)) {
75         int recon_yoffset, recon_uvoffset;
76         int mb_col;
77         int ref_fb_idx = cm->lst_fb_idx;
78         int dst_fb_idx = cm->new_fb_idx;
79         int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
80         int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
81         int map_index = (mb_row * cm->mb_cols);
82         const int *last_row_current_mb_col;
83         int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
84 
85 #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
86         vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)];
87 #else
88         tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24));
89         cpi->tplist[mb_row].start = tp;
90 #endif
91 
92         last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
93 
94         /* reset above block coeffs */
95         xd->above_context = cm->above_context;
96         xd->left_context = &mb_row_left_context;
97 
98         vp8_zero(mb_row_left_context);
99 
100         xd->up_available = (mb_row != 0);
101         recon_yoffset = (mb_row * recon_y_stride * 16);
102         recon_uvoffset = (mb_row * recon_uv_stride * 8);
103 
104         /* Set the mb activity pointer to the start of the row. */
105         x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
106 
107         /* for each macroblock col in image */
108         for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
109           if (((mb_col - 1) % nsync) == 0) {
110             pthread_mutex_t *mutex = &cpi->pmutex[mb_row];
111             protected_write(mutex, current_mb_col, mb_col - 1);
112           }
113 
114           if (mb_row && !(mb_col & (nsync - 1))) {
115             pthread_mutex_t *mutex = &cpi->pmutex[mb_row - 1];
116             sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
117           }
118 
119 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
120           tp = tp_start;
121 #endif
122 
123           /* Distance of Mb to the various image edges.
124            * These specified to 8th pel as they are always compared
125            * to values that are in 1/8th pel units
126            */
127           xd->mb_to_left_edge = -((mb_col * 16) << 3);
128           xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
129           xd->mb_to_top_edge = -((mb_row * 16) << 3);
130           xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
131 
132           /* Set up limit values for motion vectors used to prevent
133            * them extending outside the UMV borders
134            */
135           x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
136           x->mv_col_max =
137               ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
138           x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
139           x->mv_row_max =
140               ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
141 
142           xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
143           xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
144           xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
145           xd->left_available = (mb_col != 0);
146 
147           x->rddiv = cpi->RDDIV;
148           x->rdmult = cpi->RDMULT;
149 
150           /* Copy current mb to a buffer */
151           vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
152 
153           if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp8_activity_masking(cpi, x);
154 
155           /* Is segmentation enabled */
156           /* MB level adjustment to quantizer */
157           if (xd->segmentation_enabled) {
158             /* Code to set segment id in xd->mbmi.segment_id for
159              * current MB (with range checking)
160              */
161             if (cpi->segmentation_map[map_index + mb_col] <= 3) {
162               xd->mode_info_context->mbmi.segment_id =
163                   cpi->segmentation_map[map_index + mb_col];
164             } else {
165               xd->mode_info_context->mbmi.segment_id = 0;
166             }
167 
168             vp8cx_mb_init_quantizer(cpi, x, 1);
169           } else {
170             /* Set to Segment 0 by default */
171             xd->mode_info_context->mbmi.segment_id = 0;
172           }
173 
174           x->active_ptr = cpi->active_map + map_index + mb_col;
175 
176           if (cm->frame_type == KEY_FRAME) {
177             *totalrate += vp8cx_encode_intra_macroblock(cpi, x, &tp);
178 #ifdef MODE_STATS
179             y_modes[xd->mbmi.mode]++;
180 #endif
181           } else {
182             *totalrate += vp8cx_encode_inter_macroblock(
183                 cpi, x, &tp, recon_yoffset, recon_uvoffset, mb_row, mb_col);
184 
185 #ifdef MODE_STATS
186             inter_y_modes[xd->mbmi.mode]++;
187 
188             if (xd->mbmi.mode == SPLITMV) {
189               int b;
190 
191               for (b = 0; b < xd->mbmi.partition_count; ++b) {
192                 inter_b_modes[x->partition->bmi[b].mode]++;
193               }
194             }
195 
196 #endif
197             // Keep track of how many (consecutive) times a  block
198             // is coded as ZEROMV_LASTREF, for base layer frames.
199             // Reset to 0 if its coded as anything else.
200             if (cpi->current_layer == 0) {
201               if (xd->mode_info_context->mbmi.mode == ZEROMV &&
202                   xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) {
203                 // Increment, check for wrap-around.
204                 if (cpi->consec_zero_last[map_index + mb_col] < 255) {
205                   cpi->consec_zero_last[map_index + mb_col] += 1;
206                 }
207                 if (cpi->consec_zero_last_mvbias[map_index + mb_col] < 255) {
208                   cpi->consec_zero_last_mvbias[map_index + mb_col] += 1;
209                 }
210               } else {
211                 cpi->consec_zero_last[map_index + mb_col] = 0;
212                 cpi->consec_zero_last_mvbias[map_index + mb_col] = 0;
213               }
214               if (x->zero_last_dot_suppress) {
215                 cpi->consec_zero_last_mvbias[map_index + mb_col] = 0;
216               }
217             }
218 
219             /* Special case code for cyclic refresh
220              * If cyclic update enabled then copy
221              * xd->mbmi.segment_id; (which may have been updated
222              * based on mode during
223              * vp8cx_encode_inter_macroblock()) back into the
224              * global segmentation map
225              */
226             if ((cpi->current_layer == 0) &&
227                 (cpi->cyclic_refresh_mode_enabled &&
228                  xd->segmentation_enabled)) {
229               const MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
230               cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id;
231 
232               /* If the block has been refreshed mark it as clean
233                * (the magnitude of the -ve influences how long it
234                * will be before we consider another refresh):
235                * Else if it was coded (last frame 0,0) and has
236                * not already been refreshed then mark it as a
237                * candidate for cleanup next time (marked 0) else
238                * mark it as dirty (1).
239                */
240               if (mbmi->segment_id) {
241                 cpi->cyclic_refresh_map[map_index + mb_col] = -1;
242               } else if ((mbmi->mode == ZEROMV) &&
243                          (mbmi->ref_frame == LAST_FRAME)) {
244                 if (cpi->cyclic_refresh_map[map_index + mb_col] == 1) {
245                   cpi->cyclic_refresh_map[map_index + mb_col] = 0;
246                 }
247               } else {
248                 cpi->cyclic_refresh_map[map_index + mb_col] = 1;
249               }
250             }
251           }
252 
253 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
254           /* pack tokens for this MB */
255           {
256             int tok_count = tp - tp_start;
257             vp8_pack_tokens(w, tp_start, tok_count);
258           }
259 #else
260           cpi->tplist[mb_row].stop = tp;
261 #endif
262           /* Increment pointer into gf usage flags structure. */
263           x->gf_active_ptr++;
264 
265           /* Increment the activity mask pointers. */
266           x->mb_activity_ptr++;
267 
268           /* adjust to the next column of macroblocks */
269           x->src.y_buffer += 16;
270           x->src.u_buffer += 8;
271           x->src.v_buffer += 8;
272 
273           recon_yoffset += 16;
274           recon_uvoffset += 8;
275 
276           /* Keep track of segment usage */
277           segment_counts[xd->mode_info_context->mbmi.segment_id]++;
278 
279           /* skip to next mb */
280           xd->mode_info_context++;
281           x->partition_info++;
282           xd->above_context++;
283         }
284 
285         vp8_extend_mb_row(&cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16,
286                           xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
287 
288         protected_write(&cpi->pmutex[mb_row], current_mb_col, mb_col + nsync);
289 
290         /* this is to account for the border */
291         xd->mode_info_context++;
292         x->partition_info++;
293 
294         x->src.y_buffer +=
295             16 * x->src.y_stride * (cpi->encoding_thread_count + 1) -
296             16 * cm->mb_cols;
297         x->src.u_buffer +=
298             8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) -
299             8 * cm->mb_cols;
300         x->src.v_buffer +=
301             8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) -
302             8 * cm->mb_cols;
303 
304         xd->mode_info_context +=
305             xd->mode_info_stride * cpi->encoding_thread_count;
306         x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
307         x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count;
308       }
309       /* Signal that this thread has completed processing its rows. */
310       sem_post(&cpi->h_event_end_encoding[ithread]);
311     }
312   }
313 
314   /* printf("exit thread %d\n", ithread); */
315   return 0;
316 }
317 
setup_mbby_copy(MACROBLOCK * mbdst,MACROBLOCK * mbsrc)318 static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) {
319   MACROBLOCK *x = mbsrc;
320   MACROBLOCK *z = mbdst;
321   int i;
322 
323   z->ss = x->ss;
324   z->ss_count = x->ss_count;
325   z->searches_per_step = x->searches_per_step;
326   z->errorperbit = x->errorperbit;
327 
328   z->sadperbit16 = x->sadperbit16;
329   z->sadperbit4 = x->sadperbit4;
330 
331   /*
332   z->mv_col_min    = x->mv_col_min;
333   z->mv_col_max    = x->mv_col_max;
334   z->mv_row_min    = x->mv_row_min;
335   z->mv_row_max    = x->mv_row_max;
336   */
337 
338   z->short_fdct4x4 = x->short_fdct4x4;
339   z->short_fdct8x4 = x->short_fdct8x4;
340   z->short_walsh4x4 = x->short_walsh4x4;
341   z->quantize_b = x->quantize_b;
342   z->optimize = x->optimize;
343 
344   /*
345   z->mvc              = x->mvc;
346   z->src.y_buffer      = x->src.y_buffer;
347   z->src.u_buffer      = x->src.u_buffer;
348   z->src.v_buffer      = x->src.v_buffer;
349   */
350 
351   z->mvcost[0] = x->mvcost[0];
352   z->mvcost[1] = x->mvcost[1];
353   z->mvsadcost[0] = x->mvsadcost[0];
354   z->mvsadcost[1] = x->mvsadcost[1];
355 
356   z->token_costs = x->token_costs;
357   z->inter_bmode_costs = x->inter_bmode_costs;
358   z->mbmode_cost = x->mbmode_cost;
359   z->intra_uv_mode_cost = x->intra_uv_mode_cost;
360   z->bmode_costs = x->bmode_costs;
361 
362   for (i = 0; i < 25; ++i) {
363     z->block[i].quant = x->block[i].quant;
364     z->block[i].quant_fast = x->block[i].quant_fast;
365     z->block[i].quant_shift = x->block[i].quant_shift;
366     z->block[i].zbin = x->block[i].zbin;
367     z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
368     z->block[i].round = x->block[i].round;
369     z->block[i].src_stride = x->block[i].src_stride;
370   }
371 
372   z->q_index = x->q_index;
373   z->act_zbin_adj = x->act_zbin_adj;
374   z->last_act_zbin_adj = x->last_act_zbin_adj;
375 
376   {
377     MACROBLOCKD *xd = &x->e_mbd;
378     MACROBLOCKD *zd = &z->e_mbd;
379 
380     /*
381     zd->mode_info_context = xd->mode_info_context;
382     zd->mode_info        = xd->mode_info;
383 
384     zd->mode_info_stride  = xd->mode_info_stride;
385     zd->frame_type       = xd->frame_type;
386     zd->up_available     = xd->up_available   ;
387     zd->left_available   = xd->left_available;
388     zd->left_context     = xd->left_context;
389     zd->last_frame_dc     = xd->last_frame_dc;
390     zd->last_frame_dccons = xd->last_frame_dccons;
391     zd->gold_frame_dc     = xd->gold_frame_dc;
392     zd->gold_frame_dccons = xd->gold_frame_dccons;
393     zd->mb_to_left_edge    = xd->mb_to_left_edge;
394     zd->mb_to_right_edge   = xd->mb_to_right_edge;
395     zd->mb_to_top_edge     = xd->mb_to_top_edge   ;
396     zd->mb_to_bottom_edge  = xd->mb_to_bottom_edge;
397     zd->gf_active_ptr     = xd->gf_active_ptr;
398     zd->frames_since_golden       = xd->frames_since_golden;
399     zd->frames_till_alt_ref_frame   = xd->frames_till_alt_ref_frame;
400     */
401     zd->subpixel_predict = xd->subpixel_predict;
402     zd->subpixel_predict8x4 = xd->subpixel_predict8x4;
403     zd->subpixel_predict8x8 = xd->subpixel_predict8x8;
404     zd->subpixel_predict16x16 = xd->subpixel_predict16x16;
405     zd->segmentation_enabled = xd->segmentation_enabled;
406     zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
407     memcpy(zd->segment_feature_data, xd->segment_feature_data,
408            sizeof(xd->segment_feature_data));
409 
410     memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
411     memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
412     memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
413     memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
414 
415 #if 1
416     /*TODO:  Remove dequant from BLOCKD.  This is a temporary solution until
417      * the quantizer code uses a passed in pointer to the dequant constants.
418      * This will also require modifications to the x86 and neon assembly.
419      * */
420     for (i = 0; i < 16; ++i) zd->block[i].dequant = zd->dequant_y1;
421     for (i = 16; i < 24; ++i) zd->block[i].dequant = zd->dequant_uv;
422     zd->block[24].dequant = zd->dequant_y2;
423 #endif
424 
425     memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes));
426     memcpy(z->rd_thresh_mult, x->rd_thresh_mult, sizeof(x->rd_thresh_mult));
427 
428     z->zbin_over_quant = x->zbin_over_quant;
429     z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled;
430     z->zbin_mode_boost = x->zbin_mode_boost;
431 
432     memset(z->error_bins, 0, sizeof(z->error_bins));
433   }
434 }
435 
vp8cx_init_mbrthread_data(VP8_COMP * cpi,MACROBLOCK * x,MB_ROW_COMP * mbr_ei,int count)436 void vp8cx_init_mbrthread_data(VP8_COMP *cpi, MACROBLOCK *x,
437                                MB_ROW_COMP *mbr_ei, int count) {
438   VP8_COMMON *const cm = &cpi->common;
439   MACROBLOCKD *const xd = &x->e_mbd;
440   int i;
441 
442   for (i = 0; i < count; ++i) {
443     MACROBLOCK *mb = &mbr_ei[i].mb;
444     MACROBLOCKD *mbd = &mb->e_mbd;
445 
446     mbd->subpixel_predict = xd->subpixel_predict;
447     mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
448     mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
449     mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
450     mb->gf_active_ptr = x->gf_active_ptr;
451 
452     memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
453     mbr_ei[i].totalrate = 0;
454 
455     mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1);
456 
457     mbd->frame_type = cm->frame_type;
458 
459     mb->src = *cpi->Source;
460     mbd->pre = cm->yv12_fb[cm->lst_fb_idx];
461     mbd->dst = cm->yv12_fb[cm->new_fb_idx];
462 
463     mb->src.y_buffer += 16 * x->src.y_stride * (i + 1);
464     mb->src.u_buffer += 8 * x->src.uv_stride * (i + 1);
465     mb->src.v_buffer += 8 * x->src.uv_stride * (i + 1);
466 
467     vp8_build_block_offsets(mb);
468 
469     mbd->left_context = &cm->left_context;
470     mb->mvc = cm->fc.mvc;
471 
472     setup_mbby_copy(&mbr_ei[i].mb, x);
473 
474     mbd->fullpixel_mask = 0xffffffff;
475     if (cm->full_pixel) mbd->fullpixel_mask = 0xfffffff8;
476 
477     vp8_zero(mb->coef_counts);
478     vp8_zero(x->ymode_count);
479     mb->skip_true_count = 0;
480     vp8_zero(mb->MVcount);
481     mb->prediction_error = 0;
482     mb->intra_error = 0;
483     vp8_zero(mb->count_mb_ref_frame_usage);
484     mb->mbs_tested_so_far = 0;
485     mb->mbs_zero_last_dot_suppress = 0;
486   }
487 }
488 
vp8cx_create_encoder_threads(VP8_COMP * cpi)489 int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
490   const VP8_COMMON *cm = &cpi->common;
491 
492   cpi->b_multi_threaded = 0;
493   cpi->encoding_thread_count = 0;
494   cpi->b_lpf_running = 0;
495 
496   pthread_mutex_init(&cpi->mt_mutex, NULL);
497 
498   if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) {
499     int ithread;
500     int th_count = cpi->oxcf.multi_threaded - 1;
501     int rc = 0;
502 
503     /* don't allocate more threads than cores available */
504     if (cpi->oxcf.multi_threaded > cm->processor_core_count) {
505       th_count = cm->processor_core_count - 1;
506     }
507 
508     /* we have th_count + 1 (main) threads processing one row each */
509     /* no point to have more threads than the sync range allows */
510     if (th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1)) {
511       th_count = (cm->mb_cols / cpi->mt_sync_range) - 1;
512     }
513 
514     if (th_count == 0) return 0;
515 
516     CHECK_MEM_ERROR(cpi->h_encoding_thread,
517                     vpx_malloc(sizeof(pthread_t) * th_count));
518     CHECK_MEM_ERROR(cpi->h_event_start_encoding,
519                     vpx_malloc(sizeof(sem_t) * th_count));
520     CHECK_MEM_ERROR(cpi->h_event_end_encoding,
521                     vpx_malloc(sizeof(sem_t) * th_count));
522     CHECK_MEM_ERROR(cpi->mb_row_ei,
523                     vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
524     memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
525     CHECK_MEM_ERROR(cpi->en_thread_data,
526                     vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
527 
528     cpi->b_multi_threaded = 1;
529     cpi->encoding_thread_count = th_count;
530 
531     /*
532     printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n",
533            (cpi->encoding_thread_count +1));
534     */
535 
536     for (ithread = 0; ithread < th_count; ++ithread) {
537       ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread];
538 
539       /* Setup block ptrs and offsets */
540       vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb);
541       vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd);
542 
543       sem_init(&cpi->h_event_start_encoding[ithread], 0, 0);
544       sem_init(&cpi->h_event_end_encoding[ithread], 0, 0);
545 
546       ethd->ithread = ithread;
547       ethd->ptr1 = (void *)cpi;
548       ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread];
549 
550       rc = pthread_create(&cpi->h_encoding_thread[ithread], 0,
551                           thread_encoding_proc, ethd);
552       if (rc) break;
553     }
554 
555     if (rc) {
556       /* shutdown other threads */
557       protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
558       for (--ithread; ithread >= 0; ithread--) {
559         pthread_join(cpi->h_encoding_thread[ithread], 0);
560         sem_destroy(&cpi->h_event_start_encoding[ithread]);
561         sem_destroy(&cpi->h_event_end_encoding[ithread]);
562       }
563 
564       /* free thread related resources */
565       vpx_free(cpi->h_event_start_encoding);
566       vpx_free(cpi->h_event_end_encoding);
567       vpx_free(cpi->h_encoding_thread);
568       vpx_free(cpi->mb_row_ei);
569       vpx_free(cpi->en_thread_data);
570 
571       pthread_mutex_destroy(&cpi->mt_mutex);
572 
573       return -1;
574     }
575 
576     {
577       LPFTHREAD_DATA *lpfthd = &cpi->lpf_thread_data;
578 
579       sem_init(&cpi->h_event_start_lpf, 0, 0);
580       sem_init(&cpi->h_event_end_lpf, 0, 0);
581 
582       lpfthd->ptr1 = (void *)cpi;
583       rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd);
584 
585       if (rc) {
586         /* shutdown other threads */
587         protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
588         for (--ithread; ithread >= 0; ithread--) {
589           sem_post(&cpi->h_event_start_encoding[ithread]);
590           sem_post(&cpi->h_event_end_encoding[ithread]);
591           pthread_join(cpi->h_encoding_thread[ithread], 0);
592           sem_destroy(&cpi->h_event_start_encoding[ithread]);
593           sem_destroy(&cpi->h_event_end_encoding[ithread]);
594         }
595         sem_destroy(&cpi->h_event_end_lpf);
596         sem_destroy(&cpi->h_event_start_lpf);
597 
598         /* free thread related resources */
599         vpx_free(cpi->h_event_start_encoding);
600         vpx_free(cpi->h_event_end_encoding);
601         vpx_free(cpi->h_encoding_thread);
602         vpx_free(cpi->mb_row_ei);
603         vpx_free(cpi->en_thread_data);
604 
605         pthread_mutex_destroy(&cpi->mt_mutex);
606 
607         return -2;
608       }
609     }
610   }
611   return 0;
612 }
613 
vp8cx_remove_encoder_threads(VP8_COMP * cpi)614 void vp8cx_remove_encoder_threads(VP8_COMP *cpi) {
615   if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded)) {
616     /* shutdown other threads */
617     protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
618     {
619       int i;
620 
621       for (i = 0; i < cpi->encoding_thread_count; ++i) {
622         sem_post(&cpi->h_event_start_encoding[i]);
623         sem_post(&cpi->h_event_end_encoding[i]);
624 
625         pthread_join(cpi->h_encoding_thread[i], 0);
626 
627         sem_destroy(&cpi->h_event_start_encoding[i]);
628         sem_destroy(&cpi->h_event_end_encoding[i]);
629       }
630 
631       sem_post(&cpi->h_event_start_lpf);
632       pthread_join(cpi->h_filter_thread, 0);
633     }
634 
635     sem_destroy(&cpi->h_event_end_lpf);
636     sem_destroy(&cpi->h_event_start_lpf);
637 
638     /* free thread related resources */
639     vpx_free(cpi->h_event_start_encoding);
640     vpx_free(cpi->h_event_end_encoding);
641     vpx_free(cpi->h_encoding_thread);
642     vpx_free(cpi->mb_row_ei);
643     vpx_free(cpi->en_thread_data);
644   }
645   pthread_mutex_destroy(&cpi->mt_mutex);
646 }
647 #endif
648