• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_COMMON_THREAD_COMMON_H_
13 #define AOM_AV1_COMMON_THREAD_COMMON_H_
14 
15 #include "config/aom_config.h"
16 
17 #include "av1/common/av1_loopfilter.h"
18 #include "av1/common/cdef.h"
19 #include "aom_util/aom_thread.h"
20 
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24 
25 struct AV1Common;
26 
27 typedef struct AV1LfMTInfo {
28   int mi_row;
29   int plane;
30   int dir;
31   int lpf_opt_level;
32 } AV1LfMTInfo;
33 
34 // Loopfilter row synchronization
35 typedef struct AV1LfSyncData {
36 #if CONFIG_MULTITHREAD
37   pthread_mutex_t *mutex_[MAX_MB_PLANE];
38   pthread_cond_t *cond_[MAX_MB_PLANE];
39 #endif
40   // Allocate memory to store the loop-filtered superblock index in each row.
41   int *cur_sb_col[MAX_MB_PLANE];
42   // The optimal sync_range for different resolution and platform should be
43   // determined by testing. Currently, it is chosen to be a power-of-2 number.
44   int sync_range;
45   int rows;
46 
47   // Row-based parallel loopfilter data
48   LFWorkerData *lfdata;
49   int num_workers;
50 
51 #if CONFIG_MULTITHREAD
52   pthread_mutex_t *job_mutex;
53 #endif
54   AV1LfMTInfo *job_queue;
55   int jobs_enqueued;
56   int jobs_dequeued;
57 } AV1LfSync;
58 
59 typedef struct AV1LrMTInfo {
60   int v_start;
61   int v_end;
62   int lr_unit_row;
63   int plane;
64   int sync_mode;
65   int v_copy_start;
66   int v_copy_end;
67 } AV1LrMTInfo;
68 
69 typedef struct LoopRestorationWorkerData {
70   int32_t *rst_tmpbuf;
71   void *rlbs;
72   void *lr_ctxt;
73   int do_extend_border;
74 } LRWorkerData;
75 
76 // Looprestoration row synchronization
77 typedef struct AV1LrSyncData {
78 #if CONFIG_MULTITHREAD
79   pthread_mutex_t *mutex_[MAX_MB_PLANE];
80   pthread_cond_t *cond_[MAX_MB_PLANE];
81 #endif
82   // Allocate memory to store the loop-restoration block index in each row.
83   int *cur_sb_col[MAX_MB_PLANE];
84   // The optimal sync_range for different resolution and platform should be
85   // determined by testing. Currently, it is chosen to be a power-of-2 number.
86   int sync_range;
87   int rows;
88   int num_planes;
89 
90   int num_workers;
91 
92 #if CONFIG_MULTITHREAD
93   pthread_mutex_t *job_mutex;
94 #endif
95   // Row-based parallel loopfilter data
96   LRWorkerData *lrworkerdata;
97 
98   AV1LrMTInfo *job_queue;
99   int jobs_enqueued;
100   int jobs_dequeued;
101 } AV1LrSync;
102 
103 typedef struct AV1CdefWorker {
104   AV1_COMMON *cm;
105   MACROBLOCKD *xd;
106   uint16_t *colbuf[MAX_MB_PLANE];
107   uint16_t *srcbuf;
108   uint16_t *linebuf[MAX_MB_PLANE];
109   cdef_init_fb_row_t cdef_init_fb_row_fn;
110   int do_extend_border;
111 } AV1CdefWorkerData;
112 
113 typedef struct AV1CdefRowSync {
114 #if CONFIG_MULTITHREAD
115   pthread_mutex_t *row_mutex_;
116   pthread_cond_t *row_cond_;
117 #endif  // CONFIG_MULTITHREAD
118   int is_row_done;
119 } AV1CdefRowSync;
120 
121 // Data related to CDEF search multi-thread synchronization.
122 typedef struct AV1CdefSyncData {
123 #if CONFIG_MULTITHREAD
124   // Mutex lock used while dispatching jobs.
125   pthread_mutex_t *mutex_;
126 #endif  // CONFIG_MULTITHREAD
127   // Data related to CDEF row mt sync information
128   AV1CdefRowSync *cdef_row_mt;
129   // Flag to indicate all blocks are processed and end of frame is reached
130   int end_of_frame;
131   // Row index in units of 64x64 block
132   int fbr;
133   // Column index in units of 64x64 block
134   int fbc;
135 } AV1CdefSync;
136 
137 void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
138                        AV1CdefWorkerData *const cdef_worker,
139                        AVxWorker *const workers, AV1CdefSync *const cdef_sync,
140                        int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
141                        int do_extend_border);
142 void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
143                              const MACROBLOCKD *const xd,
144                              CdefBlockInfo *const fb_info,
145                              uint16_t **const linebuf, uint16_t *const src,
146                              struct AV1CdefSyncData *const cdef_sync, int fbr);
147 void av1_cdef_copy_sb8_16(const AV1_COMMON *const cm, uint16_t *const dst,
148                           int dstride, const uint8_t *src, int src_voffset,
149                           int src_hoffset, int sstride, int vsize, int hsize);
150 void av1_cdef_copy_sb8_16_lowbd(uint16_t *const dst, int dstride,
151                                 const uint8_t *src, int src_voffset,
152                                 int src_hoffset, int sstride, int vsize,
153                                 int hsize);
154 void av1_cdef_copy_sb8_16_highbd(uint16_t *const dst, int dstride,
155                                  const uint8_t *src, int src_voffset,
156                                  int src_hoffset, int sstride, int vsize,
157                                  int hsize);
158 void av1_alloc_cdef_sync(AV1_COMMON *const cm, AV1CdefSync *cdef_sync,
159                          int num_workers);
160 void av1_free_cdef_sync(AV1CdefSync *cdef_sync);
161 
162 // Deallocate loopfilter synchronization related mutex and data.
163 void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
164 void av1_loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows,
165                            int width, int num_workers);
166 
167 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
168                               struct macroblockd *xd, int plane_start,
169                               int plane_end, int partial_frame,
170                               AVxWorker *workers, int num_workers,
171                               AV1LfSync *lf_sync, int lpf_opt_level);
172 
173 void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
174                                           struct AV1Common *cm,
175                                           int optimized_lr, AVxWorker *workers,
176                                           int num_workers, AV1LrSync *lr_sync,
177                                           void *lr_ctxt, int do_extend_border);
178 void av1_loop_restoration_dealloc(AV1LrSync *lr_sync, int num_workers);
179 void av1_loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm,
180                                 int num_workers, int num_rows_lr,
181                                 int num_planes, int width);
182 int av1_get_intrabc_extra_top_right_sb_delay(const AV1_COMMON *cm);
183 
184 void av1_thread_loop_filter_rows(
185     const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
186     struct macroblockd_plane *planes, MACROBLOCKD *xd, int mi_row, int plane,
187     int dir, int lpf_opt_level, AV1LfSync *const lf_sync,
188     AV1_DEBLOCKING_PARAMETERS *params_buf, TX_SIZE *tx_buf, int mib_size_log2);
189 
skip_loop_filter_plane(const int planes_to_lf[3],int plane,int lpf_opt_level)190 static AOM_FORCE_INLINE bool skip_loop_filter_plane(const int planes_to_lf[3],
191                                                     int plane,
192                                                     int lpf_opt_level) {
193   // If LPF_PICK_METHOD is LPF_PICK_FROM_Q, we have the option to filter both
194   // chroma planes together
195   if (lpf_opt_level == 2) {
196     if (plane == AOM_PLANE_Y) {
197       return !planes_to_lf[plane];
198     }
199     if (plane == AOM_PLANE_U) {
200       // U and V are handled together
201       return !planes_to_lf[1] && !planes_to_lf[2];
202     }
203     assert(plane == AOM_PLANE_V);
204     if (plane == AOM_PLANE_V) {
205       // V is handled when u is filtered
206       return true;
207     }
208   }
209 
210   // Normal operation mode
211   return !planes_to_lf[plane];
212 }
213 
enqueue_lf_jobs(AV1LfSync * lf_sync,int start,int stop,const int planes_to_lf[3],int lpf_opt_level,int num_mis_in_lpf_unit_height)214 static AOM_INLINE void enqueue_lf_jobs(AV1LfSync *lf_sync, int start, int stop,
215                                        const int planes_to_lf[3],
216                                        int lpf_opt_level,
217                                        int num_mis_in_lpf_unit_height) {
218   int mi_row, plane, dir;
219   AV1LfMTInfo *lf_job_queue = lf_sync->job_queue;
220   lf_sync->jobs_enqueued = 0;
221   lf_sync->jobs_dequeued = 0;
222 
223   // Launch all vertical jobs first, as they are blocking the horizontal ones.
224   // Launch top row jobs for all planes first, in case the output can be
225   // partially reconstructed row by row.
226   for (dir = 0; dir < 2; ++dir) {
227     for (mi_row = start; mi_row < stop; mi_row += num_mis_in_lpf_unit_height) {
228       for (plane = 0; plane < 3; ++plane) {
229         if (skip_loop_filter_plane(planes_to_lf, plane, lpf_opt_level)) {
230           continue;
231         }
232         if (!planes_to_lf[plane]) continue;
233         lf_job_queue->mi_row = mi_row;
234         lf_job_queue->plane = plane;
235         lf_job_queue->dir = dir;
236         lf_job_queue->lpf_opt_level = lpf_opt_level;
237         lf_job_queue++;
238         lf_sync->jobs_enqueued++;
239       }
240     }
241   }
242 }
243 
loop_filter_frame_mt_init(AV1_COMMON * cm,int start_mi_row,int end_mi_row,const int planes_to_lf[3],int num_workers,AV1LfSync * lf_sync,int lpf_opt_level,int num_mis_in_lpf_unit_height_log2)244 static AOM_INLINE void loop_filter_frame_mt_init(
245     AV1_COMMON *cm, int start_mi_row, int end_mi_row, const int planes_to_lf[3],
246     int num_workers, AV1LfSync *lf_sync, int lpf_opt_level,
247     int num_mis_in_lpf_unit_height_log2) {
248   // Number of superblock rows
249   const int sb_rows =
250       CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, num_mis_in_lpf_unit_height_log2);
251 
252   if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
253       num_workers > lf_sync->num_workers) {
254     av1_loop_filter_dealloc(lf_sync);
255     av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
256   }
257 
258   // Initialize cur_sb_col to -1 for all SB rows.
259   for (int i = 0; i < MAX_MB_PLANE; i++) {
260     memset(lf_sync->cur_sb_col[i], -1,
261            sizeof(*(lf_sync->cur_sb_col[i])) * sb_rows);
262   }
263 
264   enqueue_lf_jobs(lf_sync, start_mi_row, end_mi_row, planes_to_lf,
265                   lpf_opt_level, (1 << num_mis_in_lpf_unit_height_log2));
266 }
267 
get_lf_job_info(AV1LfSync * lf_sync)268 static AOM_INLINE AV1LfMTInfo *get_lf_job_info(AV1LfSync *lf_sync) {
269   AV1LfMTInfo *cur_job_info = NULL;
270 
271 #if CONFIG_MULTITHREAD
272   pthread_mutex_lock(lf_sync->job_mutex);
273 
274   if (lf_sync->jobs_dequeued < lf_sync->jobs_enqueued) {
275     cur_job_info = lf_sync->job_queue + lf_sync->jobs_dequeued;
276     lf_sync->jobs_dequeued++;
277   }
278 
279   pthread_mutex_unlock(lf_sync->job_mutex);
280 #else
281   (void)lf_sync;
282 #endif
283 
284   return cur_job_info;
285 }
286 
loop_filter_data_reset(LFWorkerData * lf_data,YV12_BUFFER_CONFIG * frame_buffer,struct AV1Common * cm,MACROBLOCKD * xd)287 static AOM_INLINE void loop_filter_data_reset(LFWorkerData *lf_data,
288                                               YV12_BUFFER_CONFIG *frame_buffer,
289                                               struct AV1Common *cm,
290                                               MACROBLOCKD *xd) {
291   struct macroblockd_plane *pd = xd->plane;
292   lf_data->frame_buffer = frame_buffer;
293   lf_data->cm = cm;
294   lf_data->xd = xd;
295   for (int i = 0; i < MAX_MB_PLANE; i++) {
296     memcpy(&lf_data->planes[i].dst, &pd[i].dst, sizeof(lf_data->planes[i].dst));
297     lf_data->planes[i].subsampling_x = pd[i].subsampling_x;
298     lf_data->planes[i].subsampling_y = pd[i].subsampling_y;
299   }
300 }
301 
check_planes_to_loop_filter(const struct loopfilter * lf,int * planes_to_lf,int plane_start,int plane_end)302 static AOM_INLINE int check_planes_to_loop_filter(const struct loopfilter *lf,
303                                                   int *planes_to_lf,
304                                                   int plane_start,
305                                                   int plane_end) {
306   // For each luma and chroma plane, whether to filter it or not.
307   planes_to_lf[0] = (lf->filter_level[0] || lf->filter_level[1]) &&
308                     plane_start <= 0 && 0 < plane_end;
309   planes_to_lf[1] = lf->filter_level_u && plane_start <= 1 && 1 < plane_end;
310   planes_to_lf[2] = lf->filter_level_v && plane_start <= 2 && 2 < plane_end;
311   // If the luma plane is purposely not filtered, neither are the chroma
312   // planes.
313   if (!planes_to_lf[0] && plane_start <= 0 && 0 < plane_end) return 0;
314   // Early exit.
315   if (!planes_to_lf[0] && !planes_to_lf[1] && !planes_to_lf[2]) return 0;
316   return 1;
317 }
318 
319 #ifdef __cplusplus
320 }  // extern "C"
321 #endif
322 
323 #endif  // AOM_AV1_COMMON_THREAD_COMMON_H_
324