1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "vp9/encoder/vp9_encodeframe.h"
12 #include "vp9/encoder/vp9_encoder.h"
13 #include "vp9/encoder/vp9_ethread.h"
14 #include "vp9/encoder/vp9_firstpass.h"
15 #include "vp9/encoder/vp9_multi_thread.h"
16 #include "vp9/encoder/vp9_temporal_filter.h"
17 #include "vpx_dsp/vpx_dsp_common.h"
18
accumulate_rd_opt(ThreadData * td,ThreadData * td_t)19 static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
20 int i, j, k, l, m, n;
21
22 for (i = 0; i < REFERENCE_MODES; i++)
23 td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
24
25 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
26 td->rd_counts.filter_diff[i] += td_t->rd_counts.filter_diff[i];
27
28 for (i = 0; i < TX_SIZES; i++)
29 for (j = 0; j < PLANE_TYPES; j++)
30 for (k = 0; k < REF_TYPES; k++)
31 for (l = 0; l < COEF_BANDS; l++)
32 for (m = 0; m < COEFF_CONTEXTS; m++)
33 for (n = 0; n < ENTROPY_TOKENS; n++)
34 td->rd_counts.coef_counts[i][j][k][l][m][n] +=
35 td_t->rd_counts.coef_counts[i][j][k][l][m][n];
36 }
37
enc_worker_hook(EncWorkerData * const thread_data,void * unused)38 static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
39 VP9_COMP *const cpi = thread_data->cpi;
40 const VP9_COMMON *const cm = &cpi->common;
41 const int tile_cols = 1 << cm->log2_tile_cols;
42 const int tile_rows = 1 << cm->log2_tile_rows;
43 int t;
44
45 (void)unused;
46
47 for (t = thread_data->start; t < tile_rows * tile_cols;
48 t += cpi->num_workers) {
49 int tile_row = t / tile_cols;
50 int tile_col = t % tile_cols;
51
52 vp9_encode_tile(cpi, thread_data->td, tile_row, tile_col);
53 }
54
55 return 0;
56 }
57
get_max_tile_cols(VP9_COMP * cpi)58 static int get_max_tile_cols(VP9_COMP *cpi) {
59 const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2);
60 int mi_cols = aligned_width >> MI_SIZE_LOG2;
61 int min_log2_tile_cols, max_log2_tile_cols;
62 int log2_tile_cols;
63
64 vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
65 log2_tile_cols =
66 clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
67 return (1 << log2_tile_cols);
68 }
69
create_enc_workers(VP9_COMP * cpi,int num_workers)70 static void create_enc_workers(VP9_COMP *cpi, int num_workers) {
71 VP9_COMMON *const cm = &cpi->common;
72 const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
73 int i;
74
75 // Only run once to create threads and allocate thread data.
76 if (cpi->num_workers == 0) {
77 int allocated_workers = num_workers;
78
79 // While using SVC, we need to allocate threads according to the highest
80 // resolution. When row based multithreading is enabled, it is OK to
81 // allocate more threads than the number of max tile columns.
82 if (cpi->use_svc && !cpi->row_mt) {
83 int max_tile_cols = get_max_tile_cols(cpi);
84 allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
85 }
86
87 CHECK_MEM_ERROR(cm, cpi->workers,
88 vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
89
90 CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
91 vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data)));
92
93 for (i = 0; i < allocated_workers; i++) {
94 VPxWorker *const worker = &cpi->workers[i];
95 EncWorkerData *thread_data = &cpi->tile_thr_data[i];
96
97 ++cpi->num_workers;
98 winterface->init(worker);
99
100 if (i < allocated_workers - 1) {
101 thread_data->cpi = cpi;
102
103 // Allocate thread data.
104 CHECK_MEM_ERROR(cm, thread_data->td,
105 vpx_memalign(32, sizeof(*thread_data->td)));
106 vp9_zero(*thread_data->td);
107
108 // Set up pc_tree.
109 thread_data->td->leaf_tree = NULL;
110 thread_data->td->pc_tree = NULL;
111 vp9_setup_pc_tree(cm, thread_data->td);
112
113 // Allocate frame counters in thread data.
114 CHECK_MEM_ERROR(cm, thread_data->td->counts,
115 vpx_calloc(1, sizeof(*thread_data->td->counts)));
116
117 // Create threads
118 if (!winterface->reset(worker))
119 vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
120 "Tile encoder thread creation failed");
121 } else {
122 // Main thread acts as a worker and uses the thread data in cpi.
123 thread_data->cpi = cpi;
124 thread_data->td = &cpi->td;
125 }
126 winterface->sync(worker);
127 }
128 }
129 }
130
launch_enc_workers(VP9_COMP * cpi,VPxWorkerHook hook,void * data2,int num_workers)131 static void launch_enc_workers(VP9_COMP *cpi, VPxWorkerHook hook, void *data2,
132 int num_workers) {
133 const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
134 int i;
135
136 for (i = 0; i < num_workers; i++) {
137 VPxWorker *const worker = &cpi->workers[i];
138 worker->hook = (VPxWorkerHook)hook;
139 worker->data1 = &cpi->tile_thr_data[i];
140 worker->data2 = data2;
141 }
142
143 // Encode a frame
144 for (i = 0; i < num_workers; i++) {
145 VPxWorker *const worker = &cpi->workers[i];
146 EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
147
148 // Set the starting tile for each thread.
149 thread_data->start = i;
150
151 if (i == cpi->num_workers - 1)
152 winterface->execute(worker);
153 else
154 winterface->launch(worker);
155 }
156
157 // Encoding ends.
158 for (i = 0; i < num_workers; i++) {
159 VPxWorker *const worker = &cpi->workers[i];
160 winterface->sync(worker);
161 }
162 }
163
vp9_encode_tiles_mt(VP9_COMP * cpi)164 void vp9_encode_tiles_mt(VP9_COMP *cpi) {
165 VP9_COMMON *const cm = &cpi->common;
166 const int tile_cols = 1 << cm->log2_tile_cols;
167 const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
168 int i;
169
170 vp9_init_tile_data(cpi);
171
172 create_enc_workers(cpi, num_workers);
173
174 for (i = 0; i < num_workers; i++) {
175 EncWorkerData *thread_data;
176 thread_data = &cpi->tile_thr_data[i];
177
178 // Before encoding a frame, copy the thread data from cpi.
179 if (thread_data->td != &cpi->td) {
180 thread_data->td->mb = cpi->td.mb;
181 thread_data->td->rd_counts = cpi->td.rd_counts;
182 }
183 if (thread_data->td->counts != &cpi->common.counts) {
184 memcpy(thread_data->td->counts, &cpi->common.counts,
185 sizeof(cpi->common.counts));
186 }
187
188 // Handle use_nonrd_pick_mode case.
189 if (cpi->sf.use_nonrd_pick_mode) {
190 MACROBLOCK *const x = &thread_data->td->mb;
191 MACROBLOCKD *const xd = &x->e_mbd;
192 struct macroblock_plane *const p = x->plane;
193 struct macroblockd_plane *const pd = xd->plane;
194 PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
195 int j;
196
197 for (j = 0; j < MAX_MB_PLANE; ++j) {
198 p[j].coeff = ctx->coeff_pbuf[j][0];
199 p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
200 pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
201 p[j].eobs = ctx->eobs_pbuf[j][0];
202 }
203 }
204 }
205
206 launch_enc_workers(cpi, (VPxWorkerHook)enc_worker_hook, NULL, num_workers);
207
208 for (i = 0; i < num_workers; i++) {
209 VPxWorker *const worker = &cpi->workers[i];
210 EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
211
212 // Accumulate counters.
213 if (i < cpi->num_workers - 1) {
214 vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
215 accumulate_rd_opt(&cpi->td, thread_data->td);
216 }
217 }
218 }
219
accumulate_fp_tile_stat(TileDataEnc * tile_data,TileDataEnc * tile_data_t)220 static void accumulate_fp_tile_stat(TileDataEnc *tile_data,
221 TileDataEnc *tile_data_t) {
222 tile_data->fp_data.intra_factor += tile_data_t->fp_data.intra_factor;
223 tile_data->fp_data.brightness_factor +=
224 tile_data_t->fp_data.brightness_factor;
225 tile_data->fp_data.coded_error += tile_data_t->fp_data.coded_error;
226 tile_data->fp_data.sr_coded_error += tile_data_t->fp_data.sr_coded_error;
227 tile_data->fp_data.frame_noise_energy +=
228 tile_data_t->fp_data.frame_noise_energy;
229 tile_data->fp_data.intra_error += tile_data_t->fp_data.intra_error;
230 tile_data->fp_data.intercount += tile_data_t->fp_data.intercount;
231 tile_data->fp_data.second_ref_count += tile_data_t->fp_data.second_ref_count;
232 tile_data->fp_data.neutral_count += tile_data_t->fp_data.neutral_count;
233 tile_data->fp_data.intra_count_low += tile_data_t->fp_data.intra_count_low;
234 tile_data->fp_data.intra_count_high += tile_data_t->fp_data.intra_count_high;
235 tile_data->fp_data.intra_skip_count += tile_data_t->fp_data.intra_skip_count;
236 tile_data->fp_data.mvcount += tile_data_t->fp_data.mvcount;
237 tile_data->fp_data.sum_mvr += tile_data_t->fp_data.sum_mvr;
238 tile_data->fp_data.sum_mvr_abs += tile_data_t->fp_data.sum_mvr_abs;
239 tile_data->fp_data.sum_mvc += tile_data_t->fp_data.sum_mvc;
240 tile_data->fp_data.sum_mvc_abs += tile_data_t->fp_data.sum_mvc_abs;
241 tile_data->fp_data.sum_mvrs += tile_data_t->fp_data.sum_mvrs;
242 tile_data->fp_data.sum_mvcs += tile_data_t->fp_data.sum_mvcs;
243 tile_data->fp_data.sum_in_vectors += tile_data_t->fp_data.sum_in_vectors;
244 tile_data->fp_data.intra_smooth_count +=
245 tile_data_t->fp_data.intra_smooth_count;
246 tile_data->fp_data.image_data_start_row =
247 VPXMIN(tile_data->fp_data.image_data_start_row,
248 tile_data_t->fp_data.image_data_start_row) == INVALID_ROW
249 ? VPXMAX(tile_data->fp_data.image_data_start_row,
250 tile_data_t->fp_data.image_data_start_row)
251 : VPXMIN(tile_data->fp_data.image_data_start_row,
252 tile_data_t->fp_data.image_data_start_row);
253 }
254
255 // Allocate memory for row synchronization
vp9_row_mt_sync_mem_alloc(VP9RowMTSync * row_mt_sync,VP9_COMMON * cm,int rows)256 void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, VP9_COMMON *cm,
257 int rows) {
258 row_mt_sync->rows = rows;
259 #if CONFIG_MULTITHREAD
260 {
261 int i;
262
263 CHECK_MEM_ERROR(cm, row_mt_sync->mutex_,
264 vpx_malloc(sizeof(*row_mt_sync->mutex_) * rows));
265 if (row_mt_sync->mutex_) {
266 for (i = 0; i < rows; ++i) {
267 pthread_mutex_init(&row_mt_sync->mutex_[i], NULL);
268 }
269 }
270
271 CHECK_MEM_ERROR(cm, row_mt_sync->cond_,
272 vpx_malloc(sizeof(*row_mt_sync->cond_) * rows));
273 if (row_mt_sync->cond_) {
274 for (i = 0; i < rows; ++i) {
275 pthread_cond_init(&row_mt_sync->cond_[i], NULL);
276 }
277 }
278 }
279 #endif // CONFIG_MULTITHREAD
280
281 CHECK_MEM_ERROR(cm, row_mt_sync->cur_col,
282 vpx_malloc(sizeof(*row_mt_sync->cur_col) * rows));
283
284 // Set up nsync.
285 row_mt_sync->sync_range = 1;
286 }
287
288 // Deallocate row based multi-threading synchronization related mutex and data
vp9_row_mt_sync_mem_dealloc(VP9RowMTSync * row_mt_sync)289 void vp9_row_mt_sync_mem_dealloc(VP9RowMTSync *row_mt_sync) {
290 if (row_mt_sync != NULL) {
291 #if CONFIG_MULTITHREAD
292 int i;
293
294 if (row_mt_sync->mutex_ != NULL) {
295 for (i = 0; i < row_mt_sync->rows; ++i) {
296 pthread_mutex_destroy(&row_mt_sync->mutex_[i]);
297 }
298 vpx_free(row_mt_sync->mutex_);
299 }
300 if (row_mt_sync->cond_ != NULL) {
301 for (i = 0; i < row_mt_sync->rows; ++i) {
302 pthread_cond_destroy(&row_mt_sync->cond_[i]);
303 }
304 vpx_free(row_mt_sync->cond_);
305 }
306 #endif // CONFIG_MULTITHREAD
307 vpx_free(row_mt_sync->cur_col);
308 // clear the structure as the source of this call may be dynamic change
309 // in tiles in which case this call will be followed by an _alloc()
310 // which may fail.
311 vp9_zero(*row_mt_sync);
312 }
313 }
314
vp9_row_mt_sync_read(VP9RowMTSync * const row_mt_sync,int r,int c)315 void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c) {
316 #if CONFIG_MULTITHREAD
317 const int nsync = row_mt_sync->sync_range;
318
319 if (r && !(c & (nsync - 1))) {
320 pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1];
321 pthread_mutex_lock(mutex);
322
323 while (c > row_mt_sync->cur_col[r - 1] - nsync + 1) {
324 pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex);
325 }
326 pthread_mutex_unlock(mutex);
327 }
328 #else
329 (void)row_mt_sync;
330 (void)r;
331 (void)c;
332 #endif // CONFIG_MULTITHREAD
333 }
334
vp9_row_mt_sync_read_dummy(VP9RowMTSync * const row_mt_sync,int r,int c)335 void vp9_row_mt_sync_read_dummy(VP9RowMTSync *const row_mt_sync, int r, int c) {
336 (void)row_mt_sync;
337 (void)r;
338 (void)c;
339 return;
340 }
341
vp9_row_mt_sync_write(VP9RowMTSync * const row_mt_sync,int r,int c,const int cols)342 void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c,
343 const int cols) {
344 #if CONFIG_MULTITHREAD
345 const int nsync = row_mt_sync->sync_range;
346 int cur;
347 // Only signal when there are enough encoded blocks for next row to run.
348 int sig = 1;
349
350 if (c < cols - 1) {
351 cur = c;
352 if (c % nsync != nsync - 1) sig = 0;
353 } else {
354 cur = cols + nsync;
355 }
356
357 if (sig) {
358 pthread_mutex_lock(&row_mt_sync->mutex_[r]);
359
360 row_mt_sync->cur_col[r] = cur;
361
362 pthread_cond_signal(&row_mt_sync->cond_[r]);
363 pthread_mutex_unlock(&row_mt_sync->mutex_[r]);
364 }
365 #else
366 (void)row_mt_sync;
367 (void)r;
368 (void)c;
369 (void)cols;
370 #endif // CONFIG_MULTITHREAD
371 }
372
vp9_row_mt_sync_write_dummy(VP9RowMTSync * const row_mt_sync,int r,int c,const int cols)373 void vp9_row_mt_sync_write_dummy(VP9RowMTSync *const row_mt_sync, int r, int c,
374 const int cols) {
375 (void)row_mt_sync;
376 (void)r;
377 (void)c;
378 (void)cols;
379 return;
380 }
381
first_pass_worker_hook(EncWorkerData * const thread_data,MultiThreadHandle * multi_thread_ctxt)382 static int first_pass_worker_hook(EncWorkerData *const thread_data,
383 MultiThreadHandle *multi_thread_ctxt) {
384 VP9_COMP *const cpi = thread_data->cpi;
385 const VP9_COMMON *const cm = &cpi->common;
386 const int tile_cols = 1 << cm->log2_tile_cols;
387 int tile_row, tile_col;
388 TileDataEnc *this_tile;
389 int end_of_frame;
390 int thread_id = thread_data->thread_id;
391 int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
392 JobNode *proc_job = NULL;
393 FIRSTPASS_DATA fp_acc_data;
394 MV zero_mv = { 0, 0 };
395 MV best_ref_mv;
396 int mb_row;
397
398 end_of_frame = 0;
399 while (0 == end_of_frame) {
400 // Get the next job in the queue
401 proc_job =
402 (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
403 if (NULL == proc_job) {
404 // Query for the status of other tiles
405 end_of_frame = vp9_get_tiles_proc_status(
406 multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
407 tile_cols);
408 } else {
409 tile_col = proc_job->tile_col_id;
410 tile_row = proc_job->tile_row_id;
411
412 this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
413 mb_row = proc_job->vert_unit_row_num;
414
415 best_ref_mv = zero_mv;
416 vp9_zero(fp_acc_data);
417 fp_acc_data.image_data_start_row = INVALID_ROW;
418 vp9_first_pass_encode_tile_mb_row(cpi, thread_data->td, &fp_acc_data,
419 this_tile, &best_ref_mv, mb_row);
420 }
421 }
422 return 0;
423 }
424
vp9_encode_fp_row_mt(VP9_COMP * cpi)425 void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
426 VP9_COMMON *const cm = &cpi->common;
427 const int tile_cols = 1 << cm->log2_tile_cols;
428 const int tile_rows = 1 << cm->log2_tile_rows;
429 MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
430 TileDataEnc *first_tile_col;
431 int num_workers = VPXMAX(cpi->oxcf.max_threads, 1);
432 int i;
433
434 if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
435 multi_thread_ctxt->allocated_tile_rows < tile_rows ||
436 multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
437 vp9_row_mt_mem_dealloc(cpi);
438 vp9_init_tile_data(cpi);
439 vp9_row_mt_mem_alloc(cpi);
440 } else {
441 vp9_init_tile_data(cpi);
442 }
443
444 create_enc_workers(cpi, num_workers);
445
446 vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
447
448 vp9_prepare_job_queue(cpi, FIRST_PASS_JOB);
449
450 vp9_multi_thread_tile_init(cpi);
451
452 for (i = 0; i < num_workers; i++) {
453 EncWorkerData *thread_data;
454 thread_data = &cpi->tile_thr_data[i];
455
456 // Before encoding a frame, copy the thread data from cpi.
457 if (thread_data->td != &cpi->td) {
458 thread_data->td->mb = cpi->td.mb;
459 }
460 }
461
462 launch_enc_workers(cpi, (VPxWorkerHook)first_pass_worker_hook,
463 multi_thread_ctxt, num_workers);
464
465 first_tile_col = &cpi->tile_data[0];
466 for (i = 1; i < tile_cols; i++) {
467 TileDataEnc *this_tile = &cpi->tile_data[i];
468 accumulate_fp_tile_stat(first_tile_col, this_tile);
469 }
470 }
471
temporal_filter_worker_hook(EncWorkerData * const thread_data,MultiThreadHandle * multi_thread_ctxt)472 static int temporal_filter_worker_hook(EncWorkerData *const thread_data,
473 MultiThreadHandle *multi_thread_ctxt) {
474 VP9_COMP *const cpi = thread_data->cpi;
475 const VP9_COMMON *const cm = &cpi->common;
476 const int tile_cols = 1 << cm->log2_tile_cols;
477 int tile_row, tile_col;
478 int mb_col_start, mb_col_end;
479 TileDataEnc *this_tile;
480 int end_of_frame;
481 int thread_id = thread_data->thread_id;
482 int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
483 JobNode *proc_job = NULL;
484 int mb_row;
485
486 end_of_frame = 0;
487 while (0 == end_of_frame) {
488 // Get the next job in the queue
489 proc_job =
490 (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
491 if (NULL == proc_job) {
492 // Query for the status of other tiles
493 end_of_frame = vp9_get_tiles_proc_status(
494 multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
495 tile_cols);
496 } else {
497 tile_col = proc_job->tile_col_id;
498 tile_row = proc_job->tile_row_id;
499 this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
500 mb_col_start = (this_tile->tile_info.mi_col_start) >> 1;
501 mb_col_end = (this_tile->tile_info.mi_col_end + 1) >> 1;
502 mb_row = proc_job->vert_unit_row_num;
503
504 vp9_temporal_filter_iterate_row_c(cpi, thread_data->td, mb_row,
505 mb_col_start, mb_col_end);
506 }
507 }
508 return 0;
509 }
510
vp9_temporal_filter_row_mt(VP9_COMP * cpi)511 void vp9_temporal_filter_row_mt(VP9_COMP *cpi) {
512 VP9_COMMON *const cm = &cpi->common;
513 const int tile_cols = 1 << cm->log2_tile_cols;
514 const int tile_rows = 1 << cm->log2_tile_rows;
515 MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
516 int num_workers = cpi->num_workers ? cpi->num_workers : 1;
517 int i;
518
519 if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
520 multi_thread_ctxt->allocated_tile_rows < tile_rows ||
521 multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
522 vp9_row_mt_mem_dealloc(cpi);
523 vp9_init_tile_data(cpi);
524 vp9_row_mt_mem_alloc(cpi);
525 } else {
526 vp9_init_tile_data(cpi);
527 }
528
529 create_enc_workers(cpi, num_workers);
530
531 vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
532
533 vp9_prepare_job_queue(cpi, ARNR_JOB);
534
535 for (i = 0; i < num_workers; i++) {
536 EncWorkerData *thread_data;
537 thread_data = &cpi->tile_thr_data[i];
538
539 // Before encoding a frame, copy the thread data from cpi.
540 if (thread_data->td != &cpi->td) {
541 thread_data->td->mb = cpi->td.mb;
542 }
543 }
544
545 launch_enc_workers(cpi, (VPxWorkerHook)temporal_filter_worker_hook,
546 multi_thread_ctxt, num_workers);
547 }
548
enc_row_mt_worker_hook(EncWorkerData * const thread_data,MultiThreadHandle * multi_thread_ctxt)549 static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
550 MultiThreadHandle *multi_thread_ctxt) {
551 VP9_COMP *const cpi = thread_data->cpi;
552 const VP9_COMMON *const cm = &cpi->common;
553 const int tile_cols = 1 << cm->log2_tile_cols;
554 int tile_row, tile_col;
555 int end_of_frame;
556 int thread_id = thread_data->thread_id;
557 int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
558 JobNode *proc_job = NULL;
559 int mi_row;
560
561 end_of_frame = 0;
562 while (0 == end_of_frame) {
563 // Get the next job in the queue
564 proc_job =
565 (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
566 if (NULL == proc_job) {
567 // Query for the status of other tiles
568 end_of_frame = vp9_get_tiles_proc_status(
569 multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
570 tile_cols);
571 } else {
572 tile_col = proc_job->tile_col_id;
573 tile_row = proc_job->tile_row_id;
574 mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE;
575
576 vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row);
577 }
578 }
579 return 0;
580 }
581
vp9_encode_tiles_row_mt(VP9_COMP * cpi)582 void vp9_encode_tiles_row_mt(VP9_COMP *cpi) {
583 VP9_COMMON *const cm = &cpi->common;
584 const int tile_cols = 1 << cm->log2_tile_cols;
585 const int tile_rows = 1 << cm->log2_tile_rows;
586 MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
587 int num_workers = VPXMAX(cpi->oxcf.max_threads, 1);
588 int i;
589
590 if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
591 multi_thread_ctxt->allocated_tile_rows < tile_rows ||
592 multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
593 vp9_row_mt_mem_dealloc(cpi);
594 vp9_init_tile_data(cpi);
595 vp9_row_mt_mem_alloc(cpi);
596 } else {
597 vp9_init_tile_data(cpi);
598 }
599
600 create_enc_workers(cpi, num_workers);
601
602 vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
603
604 vp9_prepare_job_queue(cpi, ENCODE_JOB);
605
606 vp9_multi_thread_tile_init(cpi);
607
608 for (i = 0; i < num_workers; i++) {
609 EncWorkerData *thread_data;
610 thread_data = &cpi->tile_thr_data[i];
611 // Before encoding a frame, copy the thread data from cpi.
612 if (thread_data->td != &cpi->td) {
613 thread_data->td->mb = cpi->td.mb;
614 thread_data->td->rd_counts = cpi->td.rd_counts;
615 }
616 if (thread_data->td->counts != &cpi->common.counts) {
617 memcpy(thread_data->td->counts, &cpi->common.counts,
618 sizeof(cpi->common.counts));
619 }
620
621 // Handle use_nonrd_pick_mode case.
622 if (cpi->sf.use_nonrd_pick_mode) {
623 MACROBLOCK *const x = &thread_data->td->mb;
624 MACROBLOCKD *const xd = &x->e_mbd;
625 struct macroblock_plane *const p = x->plane;
626 struct macroblockd_plane *const pd = xd->plane;
627 PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
628 int j;
629
630 for (j = 0; j < MAX_MB_PLANE; ++j) {
631 p[j].coeff = ctx->coeff_pbuf[j][0];
632 p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
633 pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
634 p[j].eobs = ctx->eobs_pbuf[j][0];
635 }
636 }
637 }
638
639 launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,
640 multi_thread_ctxt, num_workers);
641
642 for (i = 0; i < num_workers; i++) {
643 VPxWorker *const worker = &cpi->workers[i];
644 EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
645
646 // Accumulate counters.
647 if (i < cpi->num_workers - 1) {
648 vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
649 accumulate_rd_opt(&cpi->td, thread_data->td);
650 }
651 }
652 }
653