1 /*
2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12
13 #include "vp9/encoder/vp9_encoder.h"
14 #include "vp9/encoder/vp9_ethread.h"
15 #include "vp9/encoder/vp9_multi_thread.h"
16 #include "vp9/encoder/vp9_temporal_filter.h"
17
vp9_enc_grp_get_next_job(MultiThreadHandle * multi_thread_ctxt,int tile_id)18 void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt,
19 int tile_id) {
20 RowMTInfo *row_mt_info;
21 JobQueueHandle *job_queue_hdl = NULL;
22 void *next = NULL;
23 JobNode *job_info = NULL;
24 #if CONFIG_MULTITHREAD
25 pthread_mutex_t *mutex_handle = NULL;
26 #endif
27
28 row_mt_info = (RowMTInfo *)(&multi_thread_ctxt->row_mt_info[tile_id]);
29 job_queue_hdl = (JobQueueHandle *)&row_mt_info->job_queue_hdl;
30 #if CONFIG_MULTITHREAD
31 mutex_handle = &row_mt_info->job_mutex;
32 #endif
33
34 // lock the mutex for queue access
35 #if CONFIG_MULTITHREAD
36 pthread_mutex_lock(mutex_handle);
37 #endif
38 next = job_queue_hdl->next;
39 if (next != NULL) {
40 JobQueue *job_queue = (JobQueue *)next;
41 job_info = &job_queue->job_info;
42 // Update the next job in the queue
43 job_queue_hdl->next = job_queue->next;
44 job_queue_hdl->num_jobs_acquired++;
45 }
46
47 #if CONFIG_MULTITHREAD
48 pthread_mutex_unlock(mutex_handle);
49 #endif
50
51 return job_info;
52 }
53
vp9_row_mt_alloc_rd_thresh(VP9_COMP * const cpi,TileDataEnc * const this_tile)54 void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi,
55 TileDataEnc *const this_tile) {
56 VP9_COMMON *const cm = &cpi->common;
57 const int sb_rows =
58 (mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1;
59 int i;
60
61 CHECK_MEM_ERROR(
62 cm, this_tile->row_base_thresh_freq_fact,
63 (int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES,
64 sizeof(*(this_tile->row_base_thresh_freq_fact))));
65 for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++)
66 this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT;
67 }
68
vp9_row_mt_mem_alloc(VP9_COMP * cpi)69 void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
70 struct VP9Common *cm = &cpi->common;
71 MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
72 int tile_row, tile_col;
73 const int tile_cols = 1 << cm->log2_tile_cols;
74 const int tile_rows = 1 << cm->log2_tile_rows;
75 const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
76 int jobs_per_tile_col, total_jobs;
77
78 // Allocate memory that is large enough for all row_mt stages. First pass
79 // uses 16x16 block size.
80 jobs_per_tile_col = VPXMAX(cm->mb_rows, sb_rows);
81 // Calculate the total number of jobs
82 total_jobs = jobs_per_tile_col * tile_cols;
83
84 multi_thread_ctxt->allocated_tile_cols = tile_cols;
85 multi_thread_ctxt->allocated_tile_rows = tile_rows;
86 multi_thread_ctxt->allocated_vert_unit_rows = jobs_per_tile_col;
87
88 CHECK_MEM_ERROR(cm, multi_thread_ctxt->job_queue,
89 (JobQueue *)vpx_memalign(32, total_jobs * sizeof(JobQueue)));
90
91 #if CONFIG_MULTITHREAD
92 // Create mutex for each tile
93 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
94 RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
95 pthread_mutex_init(&row_mt_info->job_mutex, NULL);
96 }
97 #endif
98
99 // Allocate memory for row based multi-threading
100 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
101 TileDataEnc *this_tile = &cpi->tile_data[tile_col];
102 vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col);
103 if (cpi->sf.adaptive_rd_thresh_row_mt) {
104 if (this_tile->row_base_thresh_freq_fact != NULL) {
105 vpx_free(this_tile->row_base_thresh_freq_fact);
106 this_tile->row_base_thresh_freq_fact = NULL;
107 }
108 vp9_row_mt_alloc_rd_thresh(cpi, this_tile);
109 }
110 }
111
112 // Assign the sync pointer of tile row zero for every tile row > 0
113 for (tile_row = 1; tile_row < tile_rows; tile_row++) {
114 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
115 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
116 TileDataEnc *this_col_tile = &cpi->tile_data[tile_col];
117 this_tile->row_mt_sync = this_col_tile->row_mt_sync;
118 }
119 }
120
121 // Calculate the number of vertical units in the given tile row
122 for (tile_row = 0; tile_row < tile_rows; tile_row++) {
123 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols];
124 TileInfo *tile_info = &this_tile->tile_info;
125 multi_thread_ctxt->num_tile_vert_sbs[tile_row] =
126 get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
127 }
128 }
129
vp9_row_mt_mem_dealloc(VP9_COMP * cpi)130 void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
131 MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
132 int tile_col;
133 #if CONFIG_MULTITHREAD
134 int tile_row;
135 #endif
136
137 // Deallocate memory for job queue
138 if (multi_thread_ctxt->job_queue) vpx_free(multi_thread_ctxt->job_queue);
139
140 #if CONFIG_MULTITHREAD
141 // Destroy mutex for each tile
142 for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
143 tile_col++) {
144 RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
145 if (row_mt_info) pthread_mutex_destroy(&row_mt_info->job_mutex);
146 }
147 #endif
148
149 // Free row based multi-threading sync memory
150 for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
151 tile_col++) {
152 TileDataEnc *this_tile = &cpi->tile_data[tile_col];
153 vp9_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync);
154 }
155
156 #if CONFIG_MULTITHREAD
157 for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;
158 tile_row++) {
159 for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
160 tile_col++) {
161 TileDataEnc *this_tile =
162 &cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols +
163 tile_col];
164 if (this_tile->row_base_thresh_freq_fact != NULL) {
165 vpx_free(this_tile->row_base_thresh_freq_fact);
166 this_tile->row_base_thresh_freq_fact = NULL;
167 }
168 }
169 }
170 #endif
171 }
172
vp9_multi_thread_tile_init(VP9_COMP * cpi)173 void vp9_multi_thread_tile_init(VP9_COMP *cpi) {
174 VP9_COMMON *const cm = &cpi->common;
175 const int tile_cols = 1 << cm->log2_tile_cols;
176 const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
177 int i;
178
179 for (i = 0; i < tile_cols; i++) {
180 TileDataEnc *this_tile = &cpi->tile_data[i];
181 int jobs_per_tile_col = cpi->oxcf.pass == 1 ? cm->mb_rows : sb_rows;
182
183 // Initialize cur_col to -1 for all rows.
184 memset(this_tile->row_mt_sync.cur_col, -1,
185 sizeof(*this_tile->row_mt_sync.cur_col) * jobs_per_tile_col);
186 vp9_zero(this_tile->fp_data);
187 this_tile->fp_data.image_data_start_row = INVALID_ROW;
188 }
189 }
190
vp9_assign_tile_to_thread(MultiThreadHandle * multi_thread_ctxt,int tile_cols,int num_workers)191 void vp9_assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt,
192 int tile_cols, int num_workers) {
193 int tile_id = 0;
194 int i;
195
196 // Allocating the threads for the tiles
197 for (i = 0; i < num_workers; i++) {
198 multi_thread_ctxt->thread_id_to_tile_id[i] = tile_id++;
199 if (tile_id == tile_cols) tile_id = 0;
200 }
201 }
202
vp9_get_job_queue_status(MultiThreadHandle * multi_thread_ctxt,int cur_tile_id)203 int vp9_get_job_queue_status(MultiThreadHandle *multi_thread_ctxt,
204 int cur_tile_id) {
205 RowMTInfo *row_mt_info;
206 JobQueueHandle *job_queue_hndl;
207 #if CONFIG_MULTITHREAD
208 pthread_mutex_t *mutex;
209 #endif
210 int num_jobs_remaining;
211
212 row_mt_info = &multi_thread_ctxt->row_mt_info[cur_tile_id];
213 job_queue_hndl = &row_mt_info->job_queue_hdl;
214 #if CONFIG_MULTITHREAD
215 mutex = &row_mt_info->job_mutex;
216 #endif
217
218 #if CONFIG_MULTITHREAD
219 pthread_mutex_lock(mutex);
220 #endif
221 num_jobs_remaining =
222 multi_thread_ctxt->jobs_per_tile_col - job_queue_hndl->num_jobs_acquired;
223 #if CONFIG_MULTITHREAD
224 pthread_mutex_unlock(mutex);
225 #endif
226
227 return (num_jobs_remaining);
228 }
229
vp9_prepare_job_queue(VP9_COMP * cpi,JOB_TYPE job_type)230 void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type) {
231 VP9_COMMON *const cm = &cpi->common;
232 MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
233 JobQueue *job_queue = multi_thread_ctxt->job_queue;
234 const int tile_cols = 1 << cm->log2_tile_cols;
235 int job_row_num, jobs_per_tile, jobs_per_tile_col = 0, total_jobs;
236 const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
237 int tile_col, i;
238
239 switch (job_type) {
240 case ENCODE_JOB: jobs_per_tile_col = sb_rows; break;
241 case FIRST_PASS_JOB: jobs_per_tile_col = cm->mb_rows; break;
242 case ARNR_JOB:
243 jobs_per_tile_col = ((cm->mi_rows + TF_ROUND) >> TF_SHIFT);
244 break;
245 default: assert(0);
246 }
247
248 total_jobs = jobs_per_tile_col * tile_cols;
249
250 multi_thread_ctxt->jobs_per_tile_col = jobs_per_tile_col;
251 // memset the entire job queue buffer to zero
252 memset(job_queue, 0, total_jobs * sizeof(JobQueue));
253
254 // Job queue preparation
255 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
256 RowMTInfo *tile_ctxt = &multi_thread_ctxt->row_mt_info[tile_col];
257 JobQueue *job_queue_curr, *job_queue_temp;
258 int tile_row = 0;
259
260 tile_ctxt->job_queue_hdl.next = (void *)job_queue;
261 tile_ctxt->job_queue_hdl.num_jobs_acquired = 0;
262
263 job_queue_curr = job_queue;
264 job_queue_temp = job_queue;
265
266 // loop over all the vertical rows
267 for (job_row_num = 0, jobs_per_tile = 0; job_row_num < jobs_per_tile_col;
268 job_row_num++, jobs_per_tile++) {
269 job_queue_curr->job_info.vert_unit_row_num = job_row_num;
270 job_queue_curr->job_info.tile_col_id = tile_col;
271 job_queue_curr->job_info.tile_row_id = tile_row;
272 job_queue_curr->next = (void *)(job_queue_temp + 1);
273 job_queue_curr = ++job_queue_temp;
274
275 if (ENCODE_JOB == job_type) {
276 if (jobs_per_tile >=
277 multi_thread_ctxt->num_tile_vert_sbs[tile_row] - 1) {
278 tile_row++;
279 jobs_per_tile = -1;
280 }
281 }
282 }
283
284 // Set the last pointer to NULL
285 job_queue_curr += -1;
286 job_queue_curr->next = (void *)NULL;
287
288 // Move to the next tile
289 job_queue += jobs_per_tile_col;
290 }
291
292 for (i = 0; i < cpi->num_workers; i++) {
293 EncWorkerData *thread_data;
294 thread_data = &cpi->tile_thr_data[i];
295 thread_data->thread_id = i;
296
297 for (tile_col = 0; tile_col < tile_cols; tile_col++)
298 thread_data->tile_completion_status[tile_col] = 0;
299 }
300 }
301
vp9_get_tiles_proc_status(MultiThreadHandle * multi_thread_ctxt,int * tile_completion_status,int * cur_tile_id,int tile_cols)302 int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt,
303 int *tile_completion_status, int *cur_tile_id,
304 int tile_cols) {
305 int tile_col;
306 int tile_id = -1; // Stores the tile ID with minimum proc done
307 int max_num_jobs_remaining = 0;
308 int num_jobs_remaining;
309
310 // Mark the completion to avoid check in the loop
311 tile_completion_status[*cur_tile_id] = 1;
312 // Check for the status of all the tiles
313 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
314 if (tile_completion_status[tile_col] == 0) {
315 num_jobs_remaining =
316 vp9_get_job_queue_status(multi_thread_ctxt, tile_col);
317 // Mark the completion to avoid checks during future switches across tiles
318 if (num_jobs_remaining == 0) tile_completion_status[tile_col] = 1;
319 if (num_jobs_remaining > max_num_jobs_remaining) {
320 max_num_jobs_remaining = num_jobs_remaining;
321 tile_id = tile_col;
322 }
323 }
324 }
325
326 if (-1 == tile_id) {
327 return 1;
328 } else {
329 // Update the cur ID to the next tile ID that will be processed,
330 // which will be the least processed tile
331 *cur_tile_id = tile_id;
332 return 0;
333 }
334 }
335