• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Samsung Electronics Co., Ltd.
3  * All Rights Reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * - Redistributions of source code must retain the above copyright notice,
9  *   this list of conditions and the following disclaimer.
10  *
11  * - Redistributions in binary form must reproduce the above copyright notice,
12  *   this list of conditions and the following disclaimer in the documentation
13  *   and/or other materials provided with the distribution.
14  *
15  * - Neither the name of the copyright owner, nor the names of its contributors
16  *   may be used to endorse or promote products derived from this software
17  *   without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "oapv_def.h"
33 
imgb_to_block(oapv_imgb_t * imgb,int c,int x_l,int y_l,int w_l,int h_l,s16 * block)34 static void imgb_to_block(oapv_imgb_t *imgb, int c, int x_l, int y_l, int w_l, int h_l, s16 *block)
35 {
36     u8 *src, *dst;
37     int i, sft_hor, sft_ver;
38     int bd = OAPV_CS_GET_BYTE_DEPTH(imgb->cs);
39 
40     if(c == 0) {
41         sft_hor = sft_ver = 0;
42     }
43     else {
44         u8 cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs));
45         sft_hor = get_chroma_sft_w(cfi);
46         sft_ver = get_chroma_sft_h(cfi);
47     }
48 
49     src = ((u8 *)imgb->a[c]) + ((y_l >> sft_ver) * imgb->s[c]) + ((x_l * bd) >> sft_hor);
50     dst = (u8 *)block;
51 
52     for(i = 0; i < (h_l); i++) {
53         oapv_mcpy(dst, src, (w_l)*bd);
54 
55         src += imgb->s[c];
56         dst += (w_l)*bd;
57     }
58 }
59 
imgb_to_block_10bit(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)60 static void imgb_to_block_10bit(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
61 {
62     const int mid_val = (1 << (10 - 1));
63     s16      *s = (s16 *)src;
64     s16      *d = (s16 *)dst;
65 
66     for(int h = 0; h < blk_h; h++) {
67         for(int w = 0; w < blk_w; w++) {
68             d[w] = s[w] - mid_val;
69         }
70         s = (s16 *)(((u8 *)s) + s_src);
71         d = (s16 *)(((u8 *)d) + s_dst);
72     }
73 }
74 
imgb_to_block_p210_y(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)75 static void imgb_to_block_p210_y(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
76 {
77     const int mid_val = (1 << (10 - 1));
78     u16      *s = (s16 *)src;
79     s16      *d = (s16 *)dst;
80 
81     for(int h = 0; h < blk_h; h++) {
82         for(int w = 0; w < blk_w; w++) {
83             d[w] = (s16)(s[w] >> 6) - mid_val;
84         }
85         s = (u16 *)(((u8 *)s) + s_src);
86         d = (s16 *)(((u8 *)d) + s_dst);
87     }
88 }
89 
imgb_to_block_p210_uv(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)90 static void imgb_to_block_p210_uv(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
91 {
92     const int mid_val = (1 << (10 - 1));
93     u16      *s = (u16 *)src + offset_src;
94     s16      *d = (s16 *)dst;
95 
96     for(int h = 0; h < blk_h; h++) {
97         for(int w = 0; w < blk_w; w++) {
98             d[w] = (s16)(s[w * 2] >> 6) - mid_val;
99         }
100         s = (u16 *)(((u8 *)s) + s_src);
101         d = (s16 *)(((u8 *)d) + s_dst);
102     }
103 }
104 
imgb_to_block_p210(oapv_imgb_t * imgb,int c,int x_l,int y_l,int w_l,int h_l,s16 * block)105 static void imgb_to_block_p210(oapv_imgb_t *imgb, int c, int x_l, int y_l, int w_l, int h_l, s16 *block)
106 {
107     u16 *src, *dst;
108     int  sft_hor, sft_ver, s_src;
109     int  bd = OAPV_CS_GET_BYTE_DEPTH(imgb->cs);
110     int  size_scale = 1;
111     int  tc = c;
112 
113     if(c == 0) {
114         sft_hor = sft_ver = 0;
115     }
116     else {
117         u8 cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs));
118         sft_hor = get_chroma_sft_w(cfi);
119         sft_ver = get_chroma_sft_h(cfi);
120         size_scale = 2;
121         tc = 1;
122     }
123 
124     s_src = imgb->s[tc] >> (bd > 1 ? 1 : 0);
125     src = ((u16 *)imgb->a[tc]) + ((y_l >> sft_ver) * s_src) + ((x_l * size_scale) >> sft_hor);
126     dst = (u16 *)block;
127 
128     for(int i = 0; i < (h_l); i++) {
129         for(int j = 0; j < (w_l); j++) {
130             dst[j] = (src[j * size_scale + (c >> 1)] >> 6);
131         }
132         src += s_src;
133         dst += w_l;
134     }
135 }
136 
block_to_imgb_10bit(void * src,int blk_w,int blk_h,int s_src,int offset_dst,int s_dst,void * dst)137 static void block_to_imgb_10bit(void *src, int blk_w, int blk_h, int s_src, int offset_dst, int s_dst, void *dst)
138 {
139     const int max_val = (1 << 10) - 1;
140     const int mid_val = (1 << (10 - 1));
141     s16      *s = (s16 *)src;
142     u16      *d = (u16 *)dst;
143 
144     for(int h = 0; h < blk_h; h++) {
145         for(int w = 0; w < blk_w; w++) {
146             d[w] = oapv_clip3(0, max_val, s[w] + mid_val);
147         }
148         s = (s16 *)(((u8 *)s) + s_src);
149         d = (u16 *)(((u8 *)d) + s_dst);
150     }
151 }
152 
block_to_imgb_p210_y(void * src,int blk_w,int blk_h,int s_src,int offset_dst,int s_dst,void * dst)153 static void block_to_imgb_p210_y(void *src, int blk_w, int blk_h, int s_src, int offset_dst, int s_dst, void *dst)
154 {
155     const int max_val = (1 << 10) - 1;
156     const int mid_val = (1 << (10 - 1));
157     s16      *s = (s16 *)src;
158     u16      *d = (u16 *)dst;
159 
160     for(int h = 0; h < blk_h; h++) {
161         for(int w = 0; w < blk_w; w++) {
162             d[w] = oapv_clip3(0, max_val, s[w] + mid_val) << 6;
163         }
164         s = (s16 *)(((u8 *)s) + s_src);
165         d = (u16 *)(((u8 *)d) + s_dst);
166     }
167 }
168 
block_to_imgb_p210_uv(void * src,int blk_w,int blk_h,int s_src,int x_pel,int s_dst,void * dst)169 static void block_to_imgb_p210_uv(void *src, int blk_w, int blk_h, int s_src, int x_pel, int s_dst, void *dst)
170 {
171     const int max_val = (1 << 10) - 1;
172     const int mid_val = (1 << (10 - 1));
173     s16      *s = (s16 *)src;
174 
175     // x_pel is x-offset value from left boundary of picture in unit of pixel.
176     // the 'dst' address has calculated by
177     // dst = (s16*)((u8*)origin + y_pel*s_dst) + x_pel;
178     // in case of P210 color format,
179     // since 's_dst' is byte size of stride including all U and V pixel values,
180     // y-offset calculation is correct.
181     // however, the adding only x_pel is not enough to address the correct pixel
182     // position of U or V because U & V use the same buffer plane
183     // in interleaved way,
184     // so, the 'dst' address should be increased by 'x_pel' to address pixel
185     // position correctly.
186     u16      *d = (u16 *)dst + x_pel; // p210 pixel value needs 0~65535 range
187 
188     for(int h = 0; h < blk_h; h++) {
189         for(int w = 0; w < blk_w; w++) {
190             d[w * 2] = ((u16)oapv_clip3(0, max_val, s[w] + mid_val)) << 6;
191         }
192         s = (s16 *)(((u8 *)s) + s_src);
193         d = (u16 *)(((u8 *)d) + s_dst);
194     }
195 }
196 
plus_mid_val(s16 * coef,int b_w,int b_h,int bit_depth)197 static void plus_mid_val(s16 *coef, int b_w, int b_h, int bit_depth)
198 {
199     int mid_val = 1 << (bit_depth - 1);
200     for(int i = 0; i < b_h * b_w; i++) {
201         coef[i] = oapv_clip3(0, (1 << bit_depth) - 1, coef[i] + mid_val);
202     }
203 }
204 
copy_fi_to_finfo(oapv_fi_t * fi,int pbu_type,int group_id,oapv_frm_info_t * finfo)205 static void copy_fi_to_finfo(oapv_fi_t *fi, int pbu_type, int group_id, oapv_frm_info_t *finfo)
206 {
207     finfo->w = (int)fi->frame_width; // casting to 'int' would be fine here
208     finfo->h = (int)fi->frame_height; // casting to 'int' would be fine here
209     finfo->cs = OAPV_CS_SET(chroma_format_idc_to_color_format(fi->chroma_format_idc), fi->bit_depth, 0);
210     finfo->pbu_type = pbu_type;
211     finfo->group_id = group_id;
212     finfo->profile_idc = fi->profile_idc;
213     finfo->level_idc = fi->level_idc;
214     finfo->band_idc = fi->band_idc;
215     finfo->chroma_format_idc = fi->chroma_format_idc;
216     finfo->bit_depth = fi->bit_depth;
217     finfo->capture_time_distance = fi->capture_time_distance;
218 }
219 
copy_fh_to_finfo(oapv_fh_t * fh,int pbu_type,int group_id,oapv_frm_info_t * finfo)220 static void copy_fh_to_finfo(oapv_fh_t *fh, int pbu_type, int group_id, oapv_frm_info_t *finfo)
221 {
222     copy_fi_to_finfo(&fh->fi, pbu_type, group_id, finfo);
223     finfo->use_q_matrix = fh->use_q_matrix;
224     for(int c = 0; c < OAPV_MAX_CC; c++) {
225         int mod = (1 << OAPV_LOG2_BLK) - 1;
226         for(int i = 0; i < OAPV_BLK_D; i++) {
227             finfo->q_matrix[c][i] = fh->q_matrix[c][i >> OAPV_LOG2_BLK][i & mod];
228         }
229     }
230     finfo->color_description_present_flag = fh->color_description_present_flag;
231     finfo->color_primaries = fh->color_primaries;
232     finfo->transfer_characteristics = fh->transfer_characteristics;
233     finfo->matrix_coefficients = fh->matrix_coefficients;
234     finfo->full_range_flag = fh->full_range_flag;
235 }
236 
237 ///////////////////////////////////////////////////////////////////////////////
238 // start of encoder code
239 #if ENABLE_ENCODER
240 ///////////////////////////////////////////////////////////////////////////////
241 
enc_id_to_ctx(oapve_t id)242 static oapve_ctx_t *enc_id_to_ctx(oapve_t id)
243 {
244     oapve_ctx_t *ctx;
245     oapv_assert_rv(id, NULL);
246     ctx = (oapve_ctx_t *)id;
247     oapv_assert_rv((ctx)->magic == OAPVE_MAGIC_CODE, NULL);
248     return ctx;
249 }
250 
enc_ctx_alloc(void)251 static oapve_ctx_t *enc_ctx_alloc(void)
252 {
253     oapve_ctx_t *ctx;
254     ctx = (oapve_ctx_t *)oapv_malloc_fast(sizeof(oapve_ctx_t));
255     oapv_assert_rv(ctx, NULL);
256     oapv_mset_x64a(ctx, 0, sizeof(oapve_ctx_t));
257     return ctx;
258 }
259 
enc_ctx_free(oapve_ctx_t * ctx)260 static void enc_ctx_free(oapve_ctx_t *ctx)
261 {
262     oapv_mfree_fast(ctx);
263 }
264 
enc_core_alloc()265 static oapve_core_t *enc_core_alloc()
266 {
267     oapve_core_t *core;
268     core = (oapve_core_t *)oapv_malloc_fast(sizeof(oapve_core_t));
269 
270     oapv_assert_rv(core, NULL);
271     oapv_mset_x64a(core, 0, sizeof(oapve_core_t));
272 
273     return core;
274 }
275 
enc_core_free(oapve_core_t * core)276 static void enc_core_free(oapve_core_t *core)
277 {
278     oapv_mfree_fast(core);
279 }
280 
enc_core_init(oapve_core_t * core,oapve_ctx_t * ctx,int tile_idx,int thread_idx)281 static int enc_core_init(oapve_core_t *core, oapve_ctx_t *ctx, int tile_idx, int thread_idx)
282 {
283     core->tile_idx = tile_idx;
284     core->ctx = ctx;
285     return OAPV_OK;
286 }
287 
enc_minus_mid_val(s16 * coef,int w_blk,int h_blk,int bit_depth)288 static void enc_minus_mid_val(s16 *coef, int w_blk, int h_blk, int bit_depth)
289 {
290     int mid_val = 1 << (bit_depth - 1);
291     for(int i = 0; i < h_blk * w_blk; i++) {
292         coef[i] -= mid_val;
293     }
294 }
295 
enc_set_tile_info(oapve_tile_t * ti,int w_pel,int h_pel,int tile_w,int tile_h,int * num_tile_cols,int * num_tile_rows,int * num_tiles)296 static int enc_set_tile_info(oapve_tile_t *ti, int w_pel, int h_pel, int tile_w,
297                              int tile_h, int *num_tile_cols, int *num_tile_rows, int *num_tiles)
298 {
299     (*num_tile_cols) = (w_pel + (tile_w - 1)) / tile_w;
300     (*num_tile_rows) = (h_pel + (tile_h - 1)) / tile_h;
301     (*num_tiles) = (*num_tile_cols) * (*num_tile_rows);
302 
303     for(int i = 0; i < (*num_tiles); i++) {
304         int tx = (i % (*num_tile_cols)) * tile_w;
305         int ty = (i / (*num_tile_cols)) * tile_h;
306         ti[i].x = tx;
307         ti[i].y = ty;
308         ti[i].w = tx + tile_w > w_pel ? w_pel - tx : tile_w;
309         ti[i].h = ty + tile_h > h_pel ? h_pel - ty : tile_h;
310     }
311     return OAPV_OK;
312 }
313 
enc_block(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)314 static double enc_block(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
315 {
316     int bit_depth = ctx->bit_depth;
317 
318     oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
319     ctx->fn_quant[0](core->coef, core->qp[c], core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 128 : 212);
320 
321     core->dc_diff = core->coef[0] - core->prev_dc[c];
322     core->prev_dc[c] = core->coef[0];
323 
324     if(ctx->rec) {
325         oapv_mcpy(core->coef_rec, core->coef, sizeof(s16) * OAPV_BLK_D);
326         ctx->fn_dquant[0](core->coef_rec, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
327         ctx->fn_itx[0](core->coef_rec, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
328     }
329 
330     return 0;
331 }
332 
enc_block_rdo_slow(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)333 static double enc_block_rdo_slow(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
334 {
335     ALIGNED_16(s16 recon[OAPV_BLK_D]) = { 0 };
336     ALIGNED_16(s16 coeff[OAPV_BLK_D]) = { 0 };
337     int        blk_w = 1 << log2_w;
338     int        blk_h = 1 << log2_h;
339     int        bit_depth = ctx->bit_depth;
340     int        qp = core->qp[c];
341     s16        org[OAPV_BLK_D] = { 0 };
342     s16       *best_coeff = core->coef;
343     s16       *best_recon = core->coef_rec;
344     int        best_cost = INT_MAX;
345     int        zero_dist = 0;
346     const u16 *scanp = oapv_tbl_scan;
347     const int  map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
348 
349     oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
350     oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
351     oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
352     ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
353 
354     {
355         oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
356         ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
357         ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
358         int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
359         oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
360         if(ctx->rec) {
361             oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
362         }
363         if(cost == 0) {
364             zero_dist = 1;
365         }
366         best_cost = cost;
367     }
368 
369     for(int itr = 0; itr < (c == 0 ? 2 : 1) && !zero_dist; itr++) {
370         for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
371             int best_idx = 0;
372             s16 org_coef = coeff[scanp[j]];
373             int adj_rng = c == 0 ? 13 : 5;
374             if(org_coef == 0) {
375                 if(c == 0 && scanp[j] < 3) {
376                     adj_rng = 3;
377                 }
378                 else {
379                     continue;
380                 }
381             }
382 
383             for(int i = 1; i < adj_rng && !zero_dist; i++) {
384                 if(i > 2) {
385                     if(best_idx == 0) {
386                         continue;
387                     }
388                     else if(best_idx % 2 == 1 && i % 2 == 0) {
389                         continue;
390                     }
391                     else if(best_idx % 2 == 0 && i % 2 == 1) {
392                         continue;
393                     }
394                 }
395 
396                 s16 test_coef = org_coef + map_idx_diff[i];
397                 coeff[scanp[j]] = test_coef;
398 
399                 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
400                 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
401                 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
402                 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
403 
404                 if(cost < best_cost) {
405                     best_cost = cost;
406                     best_coeff[scanp[j]] = test_coef;
407                     if(ctx->rec) {
408                         oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
409                     }
410                     best_idx = i;
411                     if(cost == 0) {
412                         zero_dist = 1;
413                     }
414                 }
415                 else {
416                     coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
417                 }
418             }
419         }
420     }
421 
422     core->dc_diff = best_coeff[0] - core->prev_dc[c];
423     core->prev_dc[c] = best_coeff[0];
424 
425     return best_cost;
426 }
427 
enc_block_rdo_medium(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)428 static double enc_block_rdo_medium(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
429 {
430     ALIGNED_16(s16 org[OAPV_BLK_D]);
431     ALIGNED_16(s16 recon[OAPV_BLK_D]);
432     ALIGNED_16(s16 coeff[OAPV_BLK_D]);
433     ALIGNED_16(s16 tmp_buf[OAPV_BLK_D]);
434 
435     ALIGNED_32(int rec_ups[OAPV_BLK_D]);
436     ALIGNED_32(int rec_tmp[OAPV_BLK_D]);
437 
438     int        blk_w = 1 << log2_w;
439     int        blk_h = 1 << log2_h;
440     int        bit_depth = ctx->bit_depth;
441     int        qp = core->qp[c];
442 
443     s16       *best_coeff = core->coef;
444     s16       *best_recon = core->coef_rec;
445 
446     int        best_cost = INT_MAX;
447     int        zero_dist = 0;
448     const u16 *scanp = oapv_tbl_scan;
449     const int  map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
450 
451     oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
452     oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
453     oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
454 
455     ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
456 
457     {
458         oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
459         ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
460         ctx->fn_itx_part[0](recon, tmp_buf, ITX_SHIFT1, 1 << log2_w);
461         oapv_itx_get_wo_sft(tmp_buf, recon, rec_ups, ITX_SHIFT2(bit_depth), 1 << log2_h);
462 
463         int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
464         oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
465         if(ctx->rec) {
466             oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
467         }
468         if(cost == 0) {
469             zero_dist = 1;
470         }
471         best_cost = cost;
472     }
473 
474     for(int itr = 0; itr < (c == 0 ? 2 : 1) && !zero_dist; itr++) {
475         for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
476             int best_idx = 0;
477             s16 org_coef = coeff[scanp[j]];
478             int adj_rng = (c == 0 ? 13 : 5);
479             if(org_coef == 0) {
480                 if(c == 0 && scanp[j] < 3) {
481                     adj_rng = 3;
482                 }
483                 else {
484                     continue;
485                 }
486             }
487             int q_step = 0;
488             if(core->dq_shift[c] > 0) {
489                 q_step = (core->q_mat_dec[c][scanp[j]] + (1 << (core->dq_shift[c] - 1))) >> core->dq_shift[c];
490             }
491             else {
492                 q_step = (core->q_mat_dec[c][scanp[j]]) << (-core->dq_shift[c]);
493             }
494 
495             for(int i = 1; i < adj_rng && !zero_dist; i++) {
496                 if(i > 2) {
497                     if(best_idx == 0) {
498                         continue;
499                     }
500                     else if(best_idx % 2 == 1 && i % 2 == 0) {
501                         continue;
502                     }
503                     else if(best_idx % 2 == 0 && i % 2 == 1) {
504                         continue;
505                     }
506                 }
507 
508                 s16 test_coef = org_coef + map_idx_diff[i];
509                 coeff[scanp[j]] = test_coef;
510                 int step_diff = q_step * map_idx_diff[i];
511                 ctx->fn_itx_adj[0](rec_ups, rec_tmp, j, step_diff, 9);
512                 for(int k = 0; k < 64; k++) {
513                     recon[k] = (rec_tmp[k] + 512) >> 10;
514                 }
515 
516                 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
517                 if(cost < best_cost) {
518                     oapv_mcpy(rec_ups, rec_tmp, sizeof(int) * OAPV_BLK_D);
519                     best_cost = cost;
520                     best_coeff[scanp[j]] = test_coef;
521                     best_idx = i;
522                     if(cost == 0) {
523                         zero_dist = 1;
524                     }
525                 }
526                 else {
527                     coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
528                 }
529             }
530         }
531     }
532 
533     if(ctx->rec) {
534         oapv_mcpy(best_recon, best_coeff, sizeof(s16) * OAPV_BLK_D);
535         ctx->fn_dquant[0](best_recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
536         ctx->fn_itx[0](best_recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
537     }
538 
539     core->dc_diff = best_coeff[0] - core->prev_dc[c];
540     core->prev_dc[c] = best_coeff[0];
541 
542     return best_cost;
543 }
544 
enc_block_rdo_placebo(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)545 static double enc_block_rdo_placebo(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
546 {
547     int  blk_w = 1 << log2_w;
548     int  blk_h = 1 << log2_h;
549     int  bit_depth = ctx->bit_depth;
550     int  qp = core->qp[c];
551     s16 *best_coeff = core->coef;
552     s16 *best_recon = core->coef_rec;
553     ALIGNED_16(s16 org[OAPV_BLK_D]);
554     ALIGNED_16(s16 recon[OAPV_BLK_D]);
555     ALIGNED_16(s16 coeff[OAPV_BLK_D]);
556     int        best_cost = INT_MAX;
557     int        zero_dist = 0;
558     const u16 *scanp = oapv_tbl_scan;
559     const int  map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
560 
561     oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
562     oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
563     oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
564 
565     ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
566 
567     {
568         oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
569         ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
570         ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
571         int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
572         oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
573         if(ctx->rec) {
574             oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
575         }
576         if(cost == 0) {
577             zero_dist = 1;
578         }
579         best_cost = cost;
580     }
581 
582     for(int itr = 0; itr < (c == 0 ? 7 : 3) && !zero_dist; itr++) {
583         for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
584             int best_idx = 0;
585             s16 org_coef = coeff[scanp[j]];
586             int adj_rng = (c == 0 ? 15 : 5);
587             if(org_coef == 0) {
588                 if(c == 0 && scanp[j] < 3) {
589                     adj_rng = 3;
590                 }
591                 else {
592                     continue;
593                 }
594             }
595 
596             for(int i = 1; i < adj_rng && !zero_dist; i++) {
597                 if(i > 2) {
598                     if(best_idx == 0) {
599                         continue;
600                     }
601                     else if(best_idx % 2 == 1 && i % 2 == 0) {
602                         continue;
603                     }
604                     else if(best_idx % 2 == 0 && i % 2 == 1) {
605                         continue;
606                     }
607                 }
608 
609                 s16 test_coef = org_coef + map_idx_diff[i];
610                 coeff[scanp[j]] = test_coef;
611 
612                 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
613                 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
614                 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
615                 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
616 
617                 if(cost < best_cost) {
618                     best_cost = cost;
619                     best_coeff[scanp[j]] = test_coef;
620                     if(ctx->rec) {
621                         oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
622                     }
623                     best_idx = i;
624                     if(cost == 0) {
625                         zero_dist = 1;
626                     }
627                 }
628                 else {
629                     coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
630                 }
631             }
632         }
633     }
634 
635     core->dc_diff = best_coeff[0] - core->prev_dc[c];
636     core->prev_dc[c] = best_coeff[0];
637 
638     return best_cost;
639 }
640 
enc_read_param(oapve_ctx_t * ctx,oapve_param_t * param)641 static int enc_read_param(oapve_ctx_t *ctx, oapve_param_t *param)
642 {
643     /* check input parameters */
644     oapv_assert_rv(param->w > 0 && param->h > 0, OAPV_ERR_INVALID_ARGUMENT);
645     oapv_assert_rv(param->qp >= MIN_QUANT && param->qp <= MAX_QUANT(10), OAPV_ERR_INVALID_ARGUMENT);
646 
647     ctx->qp_offset[Y_C] = 0;
648     ctx->qp_offset[U_C] = param->qp_offset_c1;
649     ctx->qp_offset[V_C] = param->qp_offset_c2;
650     ctx->qp_offset[X_C] = param->qp_offset_c3;
651 
652     ctx->num_comp = get_num_comp(param->csp);
653 
654     for(int i = 0; i < ctx->num_comp; i++) {
655         ctx->qp[i] = oapv_clip3(MIN_QUANT, MAX_QUANT(10), param->qp + ctx->qp_offset[i]);
656     }
657 
658     if(param->preset == OAPV_PRESET_PLACEBO) {
659         ctx->fn_enc_blk = enc_block_rdo_placebo;
660     }
661     else if(param->preset == OAPV_PRESET_SLOW) {
662         ctx->fn_enc_blk = enc_block_rdo_slow;
663     }
664     else if(param->preset == OAPV_PRESET_MEDIUM) {
665         ctx->fn_enc_blk = enc_block_rdo_medium;
666     }
667     else {
668         ctx->fn_enc_blk = enc_block;
669     }
670 
671     ctx->log2_block = OAPV_LOG2_BLK;
672 
673     /* set various value */
674     ctx->w = ((param->w + (OAPV_MB_W - 1)) >> OAPV_LOG2_MB_W) << OAPV_LOG2_MB_W;
675     ctx->h = ((param->h + (OAPV_MB_H - 1)) >> OAPV_LOG2_MB_H) << OAPV_LOG2_MB_H;
676 
677     int tile_w = param->tile_w_mb * OAPV_MB_W;
678     int tile_h = param->tile_h_mb * OAPV_MB_H;
679     enc_set_tile_info(ctx->tile, ctx->w, ctx->h, tile_w, tile_h, &ctx->num_tile_cols, &ctx->num_tile_rows, &ctx->num_tiles);
680 
681     return OAPV_OK;
682 }
683 
enc_flush(oapve_ctx_t * ctx)684 static void enc_flush(oapve_ctx_t *ctx)
685 {
686     // Release thread pool controller and created threads
687     if(ctx->cdesc.threads >= 1) {
688         if(ctx->tpool) {
689             // thread controller instance is present
690             // terminate the created thread
691             for(int i = 0; i < ctx->cdesc.threads; i++) {
692                 if(ctx->thread_id[i]) {
693                     // valid thread instance
694                     ctx->tpool->release(&ctx->thread_id[i]);
695                 }
696             }
697             // dinitialize the tc
698             oapv_tpool_deinit(ctx->tpool);
699             oapv_mfree_fast(ctx->tpool);
700             ctx->tpool = NULL;
701         }
702     }
703 
704     oapv_tpool_sync_obj_delete(&ctx->sync_obj);
705     for(int i = 0; i < ctx->cdesc.threads; i++) {
706         enc_core_free(ctx->core[i]);
707         ctx->core[i] = NULL;
708     }
709 
710     oapv_mfree_fast(ctx->tile[0].bs_buf);
711 }
712 
enc_ready(oapve_ctx_t * ctx)713 static int enc_ready(oapve_ctx_t *ctx)
714 {
715     oapve_core_t *core = NULL;
716     int           ret = OAPV_OK;
717     oapv_assert(ctx->core[0] == NULL);
718 
719     for(int i = 0; i < ctx->cdesc.threads; i++) {
720         core = enc_core_alloc();
721         oapv_assert_gv(core != NULL, ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
722         ctx->core[i] = core;
723     }
724 
725     // initialize the threads to NULL
726     for(int i = 0; i < OAPV_MAX_THREADS; i++) {
727         ctx->thread_id[i] = 0;
728     }
729 
730     // get the context synchronization handle
731     ctx->sync_obj = oapv_tpool_sync_obj_create();
732     oapv_assert_gv(ctx->sync_obj != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
733 
734     if(ctx->cdesc.threads >= 1) {
735         ctx->tpool = oapv_malloc(sizeof(oapv_tpool_t));
736         oapv_tpool_init(ctx->tpool, ctx->cdesc.threads);
737         for(int i = 0; i < ctx->cdesc.threads; i++) {
738             ctx->thread_id[i] = ctx->tpool->create(ctx->tpool, i);
739             oapv_assert_gv(ctx->thread_id[i] != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
740         }
741     }
742 
743     for(int i = 0; i < OAPV_MAX_TILES; i++) {
744         ctx->tile[i].stat = ENC_TILE_STAT_NOT_ENCODED;
745     }
746     ctx->tile[0].bs_buf = (u8 *)oapv_malloc(ctx->cdesc.max_bs_buf_size);
747     oapv_assert_gv(ctx->tile[0].bs_buf, ret, OAPV_ERR_UNKNOWN, ERR);
748 
749     ctx->rc_param.alpha = OAPV_RC_ALPHA;
750     ctx->rc_param.beta = OAPV_RC_BETA;
751 
752     return OAPV_OK;
753 ERR:
754 
755     enc_flush(ctx);
756 
757     return ret;
758 }
759 
enc_tile_comp(oapv_bs_t * bs,oapve_tile_t * tile,oapve_ctx_t * ctx,oapve_core_t * core,int c,int s_org,void * org,int s_rec,void * rec)760 static int enc_tile_comp(oapv_bs_t *bs, oapve_tile_t *tile, oapve_ctx_t *ctx, oapve_core_t *core, int c, int s_org, void *org, int s_rec, void *rec)
761 {
762     int  mb_h, mb_w, mb_y, mb_x, blk_x, blk_y;
763     s16 *o16 = NULL, *r16 = NULL;
764 
765     u8  *bs_cur = oapv_bsw_sink(bs);
766     oapv_assert_rv(bsw_is_align8(bs), OAPV_ERR_MALFORMED_BITSTREAM);
767 
768     mb_w = OAPV_MB_W >> ctx->comp_sft[c][0];
769     mb_h = OAPV_MB_H >> ctx->comp_sft[c][1];
770 
771     int tile_le = tile->x >> ctx->comp_sft[c][0];
772     int tile_ri = (tile->w >> ctx->comp_sft[c][0]) + tile_le;
773     int tile_to = tile->y >> ctx->comp_sft[c][1];
774     int tile_bo = (tile->h >> ctx->comp_sft[c][1]) + tile_to;
775 
776     for(mb_y = tile_to; mb_y < tile_bo; mb_y += mb_h) {
777         for(mb_x = tile_le; mb_x < tile_ri; mb_x += mb_w) {
778             for(blk_y = mb_y; blk_y < (mb_y + mb_h); blk_y += OAPV_BLK_H) {
779                 for(blk_x = mb_x; blk_x < (mb_x + mb_w); blk_x += OAPV_BLK_W) {
780                     o16 = (s16 *)((u8 *)org + blk_y * s_org) + blk_x;
781                     ctx->fn_imgb_to_blk[c](o16, OAPV_BLK_W, OAPV_BLK_H, s_org, blk_x, (OAPV_BLK_W << 1), core->coef);
782 
783                     ctx->fn_enc_blk(ctx, core, OAPV_LOG2_BLK_W, OAPV_LOG2_BLK_H, c);
784                     oapve_vlc_dc_coeff(ctx, core, bs, core->dc_diff, c);
785                     oapve_vlc_ac_coeff(ctx, core, bs, core->coef, 0, c);
786                     DUMP_COEF(core->coef, OAPV_BLK_D, blk_x, blk_y, c);
787 
788                     if(rec != NULL) {
789                         r16 = (s16 *)((u8 *)rec + blk_y * s_rec) + blk_x;
790                         ctx->fn_blk_to_imgb[c](core->coef_rec, OAPV_BLK_W, OAPV_BLK_H, (OAPV_BLK_W << 1), blk_x, s_rec, r16);
791                     }
792                 }
793             }
794         }
795     }
796 
797     /* byte align */
798     while(!bsw_is_align8(bs)) {
799         oapv_bsw_write1(bs, 0);
800     }
801 
802     /* de-init BSW */
803     oapv_bsw_deinit(bs);
804 
805     return (int)(bs->cur - bs_cur);
806 }
807 
enc_tile(oapve_ctx_t * ctx,oapve_core_t * core,oapve_tile_t * tile)808 static int enc_tile(oapve_ctx_t *ctx, oapve_core_t *core, oapve_tile_t *tile)
809 {
810     oapv_bs_t bs;
811     oapv_bsw_init(&bs, tile->bs_buf, tile->bs_buf_max, NULL);
812 
813     int qp = 0;
814     if(ctx->param->rc_type != OAPV_RC_CQP) {
815         oapve_rc_get_qp(ctx, tile, ctx->qp[Y_C], &qp);
816     }
817     else {
818         qp = ctx->qp[Y_C];
819     }
820 
821     tile->tile_size = 0;
822     DUMP_SAVE(0);
823     oapve_vlc_tile_size(&bs, tile->tile_size);
824     oapve_set_tile_header(ctx, &tile->th, core->tile_idx, qp);
825     oapve_vlc_tile_header(ctx, &bs, &tile->th);
826 
827     for(int c = 0; c < ctx->num_comp; c++) {
828         int cnt = 0;
829         core->qp[c] = tile->th.tile_qp[c];
830         int qscale = oapv_quant_scale[core->qp[c] % 6];
831         s32 scale_multiply_16 = (s32)(qscale << 4); // 15bit + 4bit
832         for(int y = 0; y < OAPV_BLK_H; y++) {
833             for(int x = 0; x < OAPV_BLK_W; x++) {
834                 core->q_mat_enc[c][cnt++] = scale_multiply_16 / ctx->fh.q_matrix[c][y][x];
835             }
836         }
837 
838         if(ctx->rec || ctx->param->preset >= OAPV_PRESET_MEDIUM) {
839             core->dq_shift[c] = ctx->bit_depth - 2 - (core->qp[c] / 6);
840 
841             int cnt = 0;
842             int dq_scale = oapv_tbl_dq_scale[core->qp[c] % 6];
843             for(int y = 0; y < OAPV_BLK_H; y++) {
844                 for(int x = 0; x < OAPV_BLK_W; x++) {
845                     core->q_mat_dec[c][cnt++] = dq_scale * ctx->fh.q_matrix[c][y][x];
846                 }
847             }
848         }
849     }
850 
851     for(int c = 0; c < ctx->num_comp; c++) {
852         core->prev_dc_ctx[c] = 20;
853         core->prev_1st_ac_ctx[c] = 0;
854         core->prev_dc[c] = 0;
855 
856         int  tc, s_org, s_rec;
857         s16 *org, *rec;
858 
859         if(OAPV_CS_GET_FORMAT(ctx->imgb->cs) == OAPV_CF_PLANAR2) {
860             tc = c > 0 ? 1 : 0;
861             org = ctx->imgb->a[tc];
862             org += (c > 1) ? 1 : 0;
863             s_org = ctx->imgb->s[tc];
864 
865             if(ctx->rec) {
866                 rec = ctx->rec->a[tc];
867                 rec += (c > 1) ? 1 : 0;
868                 s_rec = ctx->imgb->s[tc];
869             }
870             else {
871                 rec = NULL;
872                 s_rec = 0;
873             }
874         }
875         else {
876             org = ctx->imgb->a[c];
877             s_org = ctx->imgb->s[c];
878             if(ctx->rec) {
879                 rec = ctx->rec->a[c];
880                 s_rec = ctx->imgb->s[c];
881             }
882             else {
883                 rec = NULL;
884                 s_rec = 0;
885             }
886         }
887 
888         tile->th.tile_data_size[c] = enc_tile_comp(&bs, tile, ctx, core, c, s_org, org, s_rec, rec);
889     }
890 
891     u32 bs_size = (int)(bs.cur - bs.beg);
892     if(bs_size > tile->bs_buf_max) {
893         return OAPV_ERR_OUT_OF_BS_BUF;
894     }
895     tile->bs_size = bs_size;
896 
897     oapv_bs_t bs_th;
898     bs_th.is_bin_count = 0;
899     oapv_bsw_init(&bs_th, tile->bs_buf, tile->bs_size, NULL);
900     tile->tile_size = bs_size - OAPV_TILE_SIZE_LEN;
901 
902     DUMP_SAVE(1);
903     DUMP_LOAD(0);
904     oapve_vlc_tile_size(&bs_th, tile->tile_size);
905     oapve_vlc_tile_header(ctx, &bs_th, &tile->th);
906     DUMP_LOAD(1);
907     oapv_bsw_deinit(&bs_th);
908     return OAPV_OK;
909 }
910 
enc_thread_tile(void * arg)911 static int enc_thread_tile(void *arg)
912 {
913     oapve_core_t *core = (oapve_core_t *)arg;
914     oapve_ctx_t  *ctx = core->ctx;
915     oapve_tile_t *tile = ctx->tile;
916     int           ret = OAPV_OK, i;
917 
918     while(1) {
919         // find not encoded tile
920         oapv_tpool_enter_cs(ctx->sync_obj);
921         for(i = 0; i < ctx->num_tiles; i++) {
922             if(tile[i].stat == ENC_TILE_STAT_NOT_ENCODED) {
923                 tile[i].stat = ENC_TILE_STAT_ON_ENCODING;
924                 core->tile_idx = i;
925                 break;
926             }
927         }
928         oapv_tpool_leave_cs(ctx->sync_obj);
929         if(i == ctx->num_tiles) {
930             break;
931         }
932 
933         ret = enc_tile(ctx, core, &tile[core->tile_idx]);
934         oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
935 
936         oapv_tpool_enter_cs(ctx->sync_obj);
937         tile[core->tile_idx].stat = ENC_TILE_STAT_ENCODED;
938         oapv_tpool_leave_cs(ctx->sync_obj);
939     }
940 ERR:
941     return ret;
942 }
943 
enc_img_pad_p210(oapve_ctx_t * ctx,oapv_imgb_t * imgb)944 static void enc_img_pad_p210(oapve_ctx_t *ctx, oapv_imgb_t *imgb)
945 {
946     if(ctx->w == ctx->param->w && ctx->h == ctx->param->h) {
947         return;
948     }
949 
950     if(ctx->w != ctx->param->w) {
951         for(int c = 0; c < imgb->np; c++) {
952             int  shift_w = 0;
953             int  shift_h = 0;
954 
955             int  sw = ctx->param->w >> shift_w;
956             int  ew = ctx->w >> shift_w;
957             int  th = ctx->h >> shift_h;
958             pel *dst = (pel *)imgb->a[c];
959             pel  src;
960 
961             for(int h = 0; h < th; h++) {
962                 src = dst[sw - 1];
963                 for(int w = sw; w < ew; w++) {
964                     dst[w] = src;
965                 }
966                 dst += (imgb->s[c] >> 1);
967             }
968         }
969     }
970 
971     if(ctx->h != ctx->param->h) {
972         for(int c = 0; c < imgb->np; c++) {
973             int  shift_w = 0;
974             int  shift_h = 0;
975 
976             int  sh = ctx->param->h >> shift_h;
977             int  eh = ctx->h >> shift_h;
978             int  tw = ctx->w >> shift_w;
979             pel *dst = ((pel *)imgb->a[c]) + sh * (imgb->s[c] >> 1);
980             pel *src = dst - (imgb->s[c] >> 1);
981 
982             for(int h = sh; h < eh; h++) {
983                 oapv_mcpy(dst, src, sizeof(pel) * tw);
984                 dst += (imgb->s[c] >> 1);
985             }
986         }
987     }
988 }
enc_img_pad(oapve_ctx_t * ctx,oapv_imgb_t * imgb)989 static void enc_img_pad(oapve_ctx_t *ctx, oapv_imgb_t *imgb)
990 {
991     if(ctx->w == ctx->param->w && ctx->h == ctx->param->h) {
992         return;
993     }
994 
995     if(ctx->w != ctx->param->w) {
996         for(int c = 0; c < imgb->np; c++) {
997             int  sw = ctx->param->w >> ctx->comp_sft[c][0];
998             int  ew = ctx->w >> ctx->comp_sft[c][0];
999             int  th = ctx->h >> ctx->comp_sft[c][1];
1000             pel *dst = (pel *)imgb->a[c];
1001             pel  src;
1002 
1003             for(int h = 0; h < th; h++) {
1004                 src = dst[sw - 1];
1005                 for(int w = sw; w < ew; w++) {
1006                     dst[w] = src;
1007                 }
1008                 dst += (imgb->s[c] >> 1);
1009             }
1010         }
1011     }
1012 
1013     if(ctx->h != ctx->param->h) {
1014         for(int c = 0; c < imgb->np; c++) {
1015             int  sh = ctx->param->h >> ctx->comp_sft[c][1];
1016             int  eh = ctx->h >> ctx->comp_sft[c][1];
1017             int  tw = ctx->w >> ctx->comp_sft[c][0];
1018             pel *dst = ((pel *)imgb->a[c]) + sh * (imgb->s[c] >> 1);
1019             pel *src = dst - (imgb->s[c] >> 1);
1020 
1021             for(int h = sh; h < eh; h++) {
1022                 oapv_mcpy(dst, src, sizeof(pel) * tw);
1023                 dst += (imgb->s[c] >> 1);
1024             }
1025         }
1026     }
1027 }
1028 
enc_frm_prepare(oapve_ctx_t * ctx,oapv_imgb_t * imgb_i,oapv_imgb_t * imgb_r)1029 static int enc_frm_prepare(oapve_ctx_t *ctx, oapv_imgb_t *imgb_i, oapv_imgb_t *imgb_r)
1030 {
1031     ctx->cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb_i->cs));
1032     ctx->num_comp = get_num_comp(ctx->cfi);
1033 
1034     ctx->comp_sft[Y_C][0] = 0;
1035     ctx->comp_sft[Y_C][1] = 0;
1036     for(int c = 1; c < ctx->num_comp; c++) {
1037         ctx->comp_sft[c][0] = get_chroma_sft_w(ctx->cfi);
1038         ctx->comp_sft[c][1] = get_chroma_sft_h(ctx->cfi);
1039     }
1040 
1041     ctx->bit_depth = OAPV_CS_GET_BIT_DEPTH(imgb_i->cs);
1042 
1043     if(OAPV_CS_GET_FORMAT(imgb_i->cs) == OAPV_CF_PLANAR2) {
1044         ctx->fn_imgb_to_blk_rc = imgb_to_block_p210;
1045 
1046         ctx->fn_imgb_to_blk[Y_C] = imgb_to_block_p210_y;
1047         ctx->fn_imgb_to_blk[U_C] = imgb_to_block_p210_uv;
1048         ctx->fn_imgb_to_blk[V_C] = imgb_to_block_p210_uv;
1049 
1050         ctx->fn_blk_to_imgb[Y_C] = block_to_imgb_p210_y;
1051         ctx->fn_blk_to_imgb[U_C] = block_to_imgb_p210_uv;
1052         ctx->fn_blk_to_imgb[V_C] = block_to_imgb_p210_uv;
1053         ctx->fn_img_pad = enc_img_pad_p210;
1054     }
1055     else {
1056         ctx->fn_imgb_to_blk_rc = imgb_to_block;
1057         for(int i = 0; i < ctx->num_comp; i++) {
1058             ctx->fn_imgb_to_blk[i] = imgb_to_block_10bit;
1059             ctx->fn_blk_to_imgb[i] = block_to_imgb_10bit;
1060         }
1061         ctx->fn_img_pad = enc_img_pad;
1062     }
1063 
1064     /* initialize bitstream container */
1065     // oapv_bsw_init(&ctx->bs, bitb->addr, bitb->bsize, NULL); // TODO : remove
1066     ctx->w = (imgb_i->aw[Y_C] > 0) ? imgb_i->aw[Y_C] : imgb_i->w[Y_C];
1067     ctx->h = (imgb_i->ah[Y_C] > 0) ? imgb_i->ah[Y_C] : imgb_i->h[Y_C];
1068 
1069     ctx->fn_img_pad(ctx, imgb_i);
1070 
1071     for(int i = 0; i < ctx->num_tiles; i++) {
1072         ctx->tile[i].stat = ENC_TILE_STAT_NOT_ENCODED;
1073     }
1074 
1075     ctx->imgb = imgb_i;
1076     imgb_addref(ctx->imgb);
1077     if(imgb_r != NULL) {
1078         for(int c = 0; c < ctx->num_comp; c++) {
1079             imgb_r->w[c] = imgb_i->w[c];
1080             imgb_r->h[c] = imgb_i->h[c];
1081             imgb_r->x[c] = imgb_i->x[c];
1082             imgb_r->y[c] = imgb_i->y[c];
1083         }
1084         ctx->rec = imgb_r;
1085         imgb_addref(ctx->rec);
1086     }
1087 
1088     int buf_size = ctx->cdesc.max_bs_buf_size / ctx->num_tiles;
1089     ctx->tile[0].bs_buf_max = buf_size;
1090     for(int i = 1; i < ctx->num_tiles; i++) {
1091         ctx->tile[i].bs_buf = ctx->tile[i - 1].bs_buf + buf_size;
1092         ctx->tile[i].bs_buf_max = buf_size;
1093     }
1094 
1095     for(int i = 0; i < ctx->cdesc.threads; i++) {
1096         ctx->core[i]->ctx = ctx;
1097         ctx->core[i]->thread_idx = i;
1098     }
1099 
1100     return OAPV_OK;
1101 }
1102 
enc_frm_finish(oapve_ctx_t * ctx,oapve_stat_t * stat)1103 static int enc_frm_finish(oapve_ctx_t *ctx, oapve_stat_t *stat)
1104 {
1105     imgb_release(ctx->imgb);
1106     if(ctx->rec) {
1107         imgb_release(ctx->rec);
1108         ctx->rec = NULL;
1109     }
1110     return OAPV_OK;
1111 }
1112 
enc_frame(oapve_ctx_t * ctx)1113 static int enc_frame(oapve_ctx_t *ctx)
1114 {
1115     oapv_bs_t *bs = &ctx->bs;
1116     int        ret = OAPV_OK;
1117 
1118     oapv_bs_t  bs_fh;
1119     oapv_mcpy(&bs_fh, bs, sizeof(oapv_bs_t));
1120 
1121     /* write frame header */
1122     oapve_set_frame_header(ctx, &ctx->fh);
1123     oapve_vlc_frame_header(bs, ctx, &ctx->fh);
1124 
1125     /* de-init BSW */
1126     oapv_bsw_deinit(bs);
1127 
1128     /* rc init */
1129     u64 cost_sum = 0;
1130     if(ctx->param->rc_type != OAPV_RC_CQP) {
1131         oapve_rc_get_tile_cost_thread(ctx, &cost_sum);
1132 
1133         double bits_pic = ((double)ctx->param->bitrate * 1000) / ((double)ctx->param->fps_num / ctx->param->fps_den);
1134         for(int i = 0; i < ctx->num_tiles; i++) {
1135             ctx->tile[i].rc.target_bits_left = bits_pic * ctx->tile[i].rc.cost / cost_sum;
1136             ctx->tile[i].rc.target_bits = ctx->tile[i].rc.target_bits_left;
1137         }
1138 
1139         ctx->rc_param.lambda = oapve_rc_estimate_pic_lambda(ctx, cost_sum);
1140         ctx->rc_param.qp = oapve_rc_estimate_pic_qp(ctx->rc_param.lambda);
1141 
1142         for(int c = 0; c < ctx->num_comp; c++) {
1143             ctx->qp[c] = oapv_clip3(MIN_QUANT, MAX_QUANT(10), ctx->rc_param.qp + ctx->qp_offset[c]);
1144         }
1145     }
1146 
1147     oapv_tpool_t *tpool = ctx->tpool;
1148     int           res, tidx = 0, thread_num1 = 0;
1149     int           parallel_task = (ctx->cdesc.threads > ctx->num_tiles) ? ctx->num_tiles : ctx->cdesc.threads;
1150 
1151     /* encode tiles ************************************/
1152     for(tidx = 0; tidx < (parallel_task - 1); tidx++) {
1153         tpool->run(ctx->thread_id[tidx], enc_thread_tile,
1154                    (void *)ctx->core[tidx]);
1155     }
1156     ret = enc_thread_tile((void *)ctx->core[tidx]);
1157     oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1158 
1159     for(thread_num1 = 0; thread_num1 < parallel_task - 1; thread_num1++) {
1160         res = tpool->join(ctx->thread_id[thread_num1], &ret);
1161         oapv_assert_gv(res == TPOOL_SUCCESS, ret, OAPV_ERR_FAILED_SYSCALL, ERR);
1162         oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1163     }
1164     /****************************************************/
1165 
1166     for(int i = 0; i < ctx->num_tiles; i++) {
1167         oapv_mcpy(ctx->bs.cur, ctx->tile[i].bs_buf, ctx->tile[i].bs_size);
1168         ctx->bs.cur = ctx->bs.cur + ctx->tile[i].bs_size;
1169         ctx->fh.tile_size[i] = ctx->tile[i].bs_size - OAPV_TILE_SIZE_LEN;
1170     }
1171 
1172     /* rewrite frame header */
1173     if(ctx->fh.tile_size_present_in_fh_flag) {
1174         oapve_vlc_frame_header(&bs_fh, ctx, &ctx->fh);
1175         /* de-init BSW */
1176         oapv_bsw_sink(&bs_fh);
1177     }
1178     if(ctx->param->rc_type != 0) {
1179         oapve_rc_update_after_pic(ctx, cost_sum);
1180     }
1181     return ret;
1182 
1183 ERR:
1184     return ret;
1185 }
1186 
enc_platform_init(oapve_ctx_t * ctx)1187 static int enc_platform_init(oapve_ctx_t *ctx)
1188 {
1189     // default settings
1190     ctx->fn_sad = oapv_tbl_fn_sad_16b;
1191     ctx->fn_ssd = oapv_tbl_fn_ssd_16b;
1192     ctx->fn_diff = oapv_tbl_fn_diff_16b;
1193     ctx->fn_itx_part = oapv_tbl_fn_itx_part;
1194     ctx->fn_itx = oapv_tbl_fn_itx;
1195     ctx->fn_itx_adj = oapv_tbl_fn_itx_adj;
1196     ctx->fn_txb = oapv_tbl_fn_tx;
1197     ctx->fn_quant = oapv_tbl_fn_quant;
1198     ctx->fn_dquant = oapv_tbl_fn_dquant;
1199     ctx->fn_had8x8 = oapv_dc_removed_had8x8;
1200 #if X86_SSE
1201     int check_cpu, support_sse, support_avx2;
1202 
1203     check_cpu = oapv_check_cpu_info_x86();
1204     support_sse = (check_cpu >> 0) & 1;
1205     support_avx2 = (check_cpu >> 2) & 1;
1206 
1207     if(support_avx2) {
1208         ctx->fn_sad = oapv_tbl_fn_sad_16b_avx;
1209         ctx->fn_ssd = oapv_tbl_fn_ssd_16b_avx;
1210         ctx->fn_diff = oapv_tbl_fn_diff_16b_avx;
1211         ctx->fn_itx_part = oapv_tbl_fn_itx_part_avx;
1212         ctx->fn_itx = oapv_tbl_fn_itx_avx;
1213         ctx->fn_itx_adj = oapv_tbl_fn_itx_adj_avx;
1214         ctx->fn_txb = oapv_tbl_fn_txb_avx;
1215         ctx->fn_quant = oapv_tbl_fn_quant_avx;
1216         ctx->fn_dquant = oapv_tbl_fn_dquant_avx;
1217         ctx->fn_had8x8 = oapv_dc_removed_had8x8_sse;
1218     }
1219     else if(support_sse) {
1220         ctx->fn_ssd = oapv_tbl_fn_ssd_16b_sse;
1221         ctx->fn_had8x8 = oapv_dc_removed_had8x8_sse;
1222     }
1223 #elif ARM_NEON
1224     ctx->fn_sad = oapv_tbl_fn_sad_16b_neon;
1225     ctx->fn_ssd = oapv_tbl_fn_ssd_16b_neon;
1226     ctx->fn_diff = oapv_tbl_fn_diff_16b_neon;
1227     ctx->fn_itx = oapv_tbl_fn_itx_neon;
1228     ctx->fn_txb = oapv_tbl_fn_txb_neon;
1229     ctx->fn_quant = oapv_tbl_fn_quant_neon;
1230     ctx->fn_had8x8 = oapv_dc_removed_had8x8;
1231 #endif
1232     return OAPV_OK;
1233 }
1234 
oapve_create(oapve_cdesc_t * cdesc,int * err)1235 oapve_t oapve_create(oapve_cdesc_t *cdesc, int *err)
1236 {
1237     oapve_ctx_t *ctx;
1238     int          ret;
1239 
1240     DUMP_CREATE(1);
1241     /* memory allocation for ctx and core structure */
1242     ctx = (oapve_ctx_t *)enc_ctx_alloc();
1243     if(ctx != NULL) {
1244         oapv_mcpy(&ctx->cdesc, cdesc, sizeof(oapve_cdesc_t));
1245         ret = enc_platform_init(ctx);
1246         oapv_assert_g(ret == OAPV_OK, ERR);
1247 
1248         ret = enc_ready(ctx);
1249         oapv_assert_g(ret == OAPV_OK, ERR);
1250 
1251         /* set default value for ctx */
1252         ctx->magic = OAPVE_MAGIC_CODE;
1253         ctx->id = (oapve_t)ctx;
1254         if(err) {
1255             *err = OAPV_OK;
1256         }
1257         return (ctx->id);
1258     }
1259     else {
1260         ret = OAPV_ERR;
1261     }
1262 ERR:
1263     if(ctx) {
1264         enc_ctx_free(ctx);
1265     }
1266     if(err) {
1267         *err = ret;
1268     }
1269     return NULL;
1270 }
1271 
oapve_delete(oapve_t eid)1272 void oapve_delete(oapve_t eid)
1273 {
1274     oapve_ctx_t *ctx;
1275 
1276     ctx = enc_id_to_ctx(eid);
1277     oapv_assert_r(ctx);
1278 
1279     DUMP_DELETE();
1280     enc_flush(ctx);
1281     enc_ctx_free(ctx);
1282 }
1283 
oapve_encode(oapve_t eid,oapv_frms_t * ifrms,oapvm_t mid,oapv_bitb_t * bitb,oapve_stat_t * stat,oapv_frms_t * rfrms)1284 int oapve_encode(oapve_t eid, oapv_frms_t *ifrms, oapvm_t mid, oapv_bitb_t *bitb, oapve_stat_t *stat, oapv_frms_t *rfrms)
1285 {
1286     oapve_ctx_t *ctx;
1287     oapv_frm_t  *frm;
1288     oapv_bs_t   *bs;
1289     int          i, ret;
1290 
1291     ctx = enc_id_to_ctx(eid);
1292     oapv_assert_rv(ctx != NULL && bitb->addr && bitb->bsize > 0, OAPV_ERR_INVALID_ARGUMENT);
1293 
1294     bs = &ctx->bs;
1295 
1296     oapv_bsw_init(bs, bitb->addr, bitb->bsize, NULL);
1297     oapv_mset(stat, 0, sizeof(oapve_stat_t));
1298 
1299     u8       *bs_pos_au_beg = oapv_bsw_sink(bs); // address syntax of au size
1300     u8       *bs_pos_pbu_beg;
1301     oapv_bs_t bs_pbu_beg;
1302     oapv_bsw_write(bs, 0, 32);
1303 
1304     oapv_bsw_write(bs, 0x61507631, 32); // signature ('aPv1')
1305 
1306     for(i = 0; i < ifrms->num_frms; i++) {
1307         frm = &ifrms->frm[i];
1308 
1309         /* set default value for encoding parameter */
1310         ctx->param = &ctx->cdesc.param[i];
1311         ret = enc_read_param(ctx, ctx->param);
1312         oapv_assert_rv(ret == OAPV_OK, OAPV_ERR);
1313 
1314         oapv_assert_rv(ctx->param->profile_idc == OAPV_PROFILE_422_10, OAPV_ERR_UNSUPPORTED);
1315 
1316         // prepare for encoding a frame
1317         ret = enc_frm_prepare(ctx, frm->imgb, (rfrms != NULL) ? rfrms->frm[i].imgb : NULL);
1318         oapv_assert_rv(ret == OAPV_OK, ret);
1319 
1320         bs_pos_pbu_beg = oapv_bsw_sink(bs);            /* store pbu pos to calculate size */
1321         oapv_mcpy(&bs_pbu_beg, bs, sizeof(oapv_bs_t)); /* store pbu pos of ai to re-write */
1322 
1323         DUMP_SAVE(0);
1324         oapve_vlc_pbu_size(bs, 0);
1325         oapve_vlc_pbu_header(bs, frm->pbu_type, frm->group_id);
1326         // encode a frame
1327         ret = enc_frame(ctx);
1328         oapv_assert_rv(ret == OAPV_OK, ret);
1329 
1330         // rewrite pbu_size
1331         int pbu_size = ((u8 *)oapv_bsw_sink(bs)) - bs_pos_pbu_beg - 4;
1332         DUMP_SAVE(1);
1333         DUMP_LOAD(0);
1334         oapve_vlc_pbu_size(&bs_pbu_beg, pbu_size);
1335         DUMP_LOAD(1);
1336 
1337         stat->frm_size[i] = pbu_size + 4 /* PUB size length*/;
1338         copy_fh_to_finfo(&ctx->fh, frm->pbu_type, frm->group_id, &stat->aui.frm_info[i]);
1339 
1340         // add frame hash value of reconstructed frame into metadata list
1341         if(ctx->use_frm_hash) {
1342             if(frm->pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
1343                frm->pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME) {
1344                 oapv_assert_rv(mid != NULL, OAPV_ERR_INVALID_ARGUMENT);
1345                 ret = oapv_set_md5_pld(mid, frm->group_id, ctx->rec);
1346                 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1347             }
1348         }
1349 
1350         // finishing of encoding a frame
1351         ret = enc_frm_finish(ctx, stat);
1352         oapv_assert_rv(ret == OAPV_OK, ret);
1353     }
1354     stat->aui.num_frms = ifrms->num_frms;
1355 
1356     oapvm_ctx_t *md_list = mid;
1357     if(md_list != NULL) {
1358         int num_md = md_list->num;
1359         for(i = 0; i < num_md; i++) {
1360             int group_id = md_list->group_ids[i];
1361             bs_pos_pbu_beg = oapv_bsw_sink(bs);            /* store pbu pos to calculate size */
1362             oapv_mcpy(&bs_pbu_beg, bs, sizeof(oapv_bs_t)); /* store pbu pos of ai to re-write */
1363             DUMP_SAVE(0);
1364 
1365             oapve_vlc_pbu_size(bs, 0);
1366             oapve_vlc_pbu_header(bs, OAPV_PBU_TYPE_METADATA, group_id);
1367             oapve_vlc_metadata(&md_list->md_arr[i], bs);
1368 
1369             // rewrite pbu_size
1370             int pbu_size = ((u8 *)oapv_bsw_sink(bs)) - bs_pos_pbu_beg - 4;
1371             DUMP_SAVE(1);
1372             DUMP_LOAD(0);
1373             oapve_vlc_pbu_size(&bs_pbu_beg, pbu_size);
1374             DUMP_LOAD(1);
1375         }
1376     }
1377 
1378     u32 au_size = (u32)((u8 *)oapv_bsw_sink(bs) - bs_pos_au_beg) - 4;
1379     oapv_bsw_write_direct(bs_pos_au_beg, au_size, 32); /* u(32) */
1380 
1381     oapv_bsw_deinit(&ctx->bs); /* de-init BSW */
1382     stat->write = bsw_get_write_byte(&ctx->bs);
1383 
1384     return OAPV_OK;
1385 }
1386 
oapve_config(oapve_t eid,int cfg,void * buf,int * size)1387 int oapve_config(oapve_t eid, int cfg, void *buf, int *size)
1388 {
1389     oapve_ctx_t *ctx;
1390     int          t0;
1391 
1392     ctx = enc_id_to_ctx(eid);
1393     oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
1394 
1395     switch(cfg) {
1396     /* set config **********************************************************/
1397     case OAPV_CFG_SET_QP:
1398         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1399         t0 = *((int *)buf);
1400         oapv_assert_rv(t0 >= MIN_QUANT && t0 <= MAX_QUANT(10),
1401                        OAPV_ERR_INVALID_ARGUMENT);
1402         ctx->param->qp = t0;
1403         break;
1404     case OAPV_CFG_SET_FPS_NUM:
1405         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1406         t0 = *((int *)buf);
1407         oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1408         ctx->param->fps_num = t0;
1409         break;
1410     case OAPV_CFG_SET_FPS_DEN:
1411         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1412         t0 = *((int *)buf);
1413         oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1414         ctx->param->fps_den = t0;
1415         break;
1416     case OAPV_CFG_SET_BPS:
1417         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1418         t0 = *((int *)buf);
1419         oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1420         ctx->param->bitrate = t0;
1421         break;
1422     case OAPV_CFG_SET_USE_FRM_HASH:
1423         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1424         ctx->use_frm_hash = (*((int *)buf)) ? 1 : 0;
1425         break;
1426     /* get config *******************************************************/
1427     case OAPV_CFG_GET_QP:
1428         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1429         *((int *)buf) = ctx->param->qp;
1430         break;
1431     case OAPV_CFG_GET_WIDTH:
1432         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1433         *((int *)buf) = ctx->param->w;
1434         break;
1435     case OAPV_CFG_GET_HEIGHT:
1436         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1437         *((int *)buf) = ctx->param->h;
1438         break;
1439     case OAPV_CFG_GET_FPS_NUM:
1440         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1441         *((int *)buf) = ctx->param->fps_num;
1442         break;
1443     case OAPV_CFG_GET_FPS_DEN:
1444         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1445         *((int *)buf) = ctx->param->fps_den;
1446         break;
1447     case OAPV_CFG_GET_BPS:
1448         oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1449         *((int *)buf) = ctx->param->bitrate;
1450         break;
1451     default:
1452         oapv_trace("unknown config value (%d)\n", cfg);
1453         oapv_assert_rv(0, OAPV_ERR_UNSUPPORTED);
1454     }
1455 
1456     return OAPV_OK;
1457 }
1458 
oapve_param_default(oapve_param_t * param)1459 int oapve_param_default(oapve_param_t *param)
1460 {
1461     oapv_mset(param, 0, sizeof(oapve_param_t));
1462     param->preset = OAPV_PRESET_DEFAULT;
1463 
1464     param->qp_offset_c1 = 0;
1465     param->qp_offset_c2 = 0;
1466     param->qp_offset_c3 = 0;
1467 
1468     param->tile_w_mb = 16;
1469     param->tile_h_mb = 16;
1470 
1471     param->profile_idc = OAPV_PROFILE_422_10;
1472     param->level_idc = (int)((4.1 * 30.0) + 0.5);
1473     param->band_idc = 2;
1474 
1475     param->use_q_matrix = 0;
1476 
1477     param->color_description_present_flag = 0;
1478     param->color_primaries = 2; // unspecified color primaries
1479     param->transfer_characteristics = 2; // unspecified transfer characteristics
1480     param->matrix_coefficients = 2; // unspecified matrix coefficients
1481     param->full_range_flag = 0; // limited range
1482 
1483     for(int c = 0; c < OAPV_MAX_CC; c++) {
1484         for(int i = 0; i < OAPV_BLK_D; i++) {
1485             param->q_matrix[c][i] = 16;
1486         }
1487     }
1488 
1489     return OAPV_OK;
1490 }
1491 
1492 ///////////////////////////////////////////////////////////////////////////////
1493 // enc of encoder code
1494 #endif // ENABLE_ENCODER
1495 ///////////////////////////////////////////////////////////////////////////////
1496 
1497 ///////////////////////////////////////////////////////////////////////////////
1498 // start of decoder code
1499 #if ENABLE_DECODER
1500 ///////////////////////////////////////////////////////////////////////////////
dec_id_to_ctx(oapvd_t id)1501 static oapvd_ctx_t *dec_id_to_ctx(oapvd_t id)
1502 {
1503     oapvd_ctx_t *ctx;
1504     oapv_assert_rv(id, NULL);
1505     ctx = (oapvd_ctx_t *)id;
1506     oapv_assert_rv(ctx->magic == OAPVD_MAGIC_CODE, NULL);
1507     return ctx;
1508 }
1509 
dec_ctx_alloc(void)1510 static oapvd_ctx_t *dec_ctx_alloc(void)
1511 {
1512     oapvd_ctx_t *ctx;
1513 
1514     ctx = (oapvd_ctx_t *)oapv_malloc_fast(sizeof(oapvd_ctx_t));
1515 
1516     oapv_assert_rv(ctx != NULL, NULL);
1517     oapv_mset_x64a(ctx, 0, sizeof(oapvd_ctx_t));
1518 
1519     return ctx;
1520 }
1521 
dec_ctx_free(oapvd_ctx_t * ctx)1522 static void dec_ctx_free(oapvd_ctx_t *ctx)
1523 {
1524     oapv_mfree_fast(ctx);
1525 }
1526 
dec_core_alloc(void)1527 static oapvd_core_t *dec_core_alloc(void)
1528 {
1529     oapvd_core_t *core;
1530 
1531     core = (oapvd_core_t *)oapv_malloc_fast(sizeof(oapvd_core_t));
1532 
1533     oapv_assert_rv(core, NULL);
1534     oapv_mset_x64a(core, 0, sizeof(oapvd_core_t));
1535 
1536     return core;
1537 }
1538 
dec_core_free(oapvd_core_t * core)1539 static void dec_core_free(oapvd_core_t *core)
1540 {
1541     oapv_mfree_fast(core);
1542 }
1543 
dec_block(oapvd_ctx_t * ctx,oapvd_core_t * core,int log2_w,int log2_h,int c)1544 static int dec_block(oapvd_ctx_t *ctx, oapvd_core_t *core, int log2_w, int log2_h, int c)
1545 {
1546     int bit_depth = ctx->bit_depth;
1547 
1548     // DC prediction
1549     core->coef[0] = core->dc_diff + core->prev_dc[c];
1550     core->prev_dc[c] = core->coef[0];
1551     // Inverse quantization
1552     ctx->fn_dquant[0](core->coef, core->q_mat[c], log2_w, log2_h, core->dq_shift[c]);
1553     // Inverse transform
1554     ctx->fn_itx[0](core->coef, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
1555     return OAPV_OK;
1556 }
1557 
dec_set_tile_info(oapvd_tile_t * tile,int w_pel,int h_pel,int tile_w,int tile_h,int num_tile_cols,int num_tiles)1558 static int dec_set_tile_info(oapvd_tile_t* tile, int w_pel, int h_pel, int tile_w, int tile_h, int num_tile_cols, int num_tiles)
1559 {
1560 
1561     for (int i = 0; i < num_tiles; i++)
1562     {
1563         int tx = (i % (num_tile_cols)) * tile_w;
1564         int ty = (i / (num_tile_cols)) * tile_h;
1565         tile[i].x = tx;
1566         tile[i].y = ty;
1567         tile[i].w = tx + tile_w > w_pel ? w_pel - tx : tile_w;
1568         tile[i].h = ty + tile_h > h_pel ? h_pel - ty : tile_h;
1569     }
1570     return OAPV_OK;
1571 }
1572 
dec_frm_prepare(oapvd_ctx_t * ctx,oapv_imgb_t * imgb)1573 static int dec_frm_prepare(oapvd_ctx_t *ctx, oapv_imgb_t *imgb)
1574 {
1575     ctx->imgb = imgb;
1576     imgb_addref(ctx->imgb); // increase reference count
1577 
1578     ctx->bit_depth = ctx->fh.fi.bit_depth;
1579     ctx->cfi = ctx->fh.fi.chroma_format_idc;
1580     ctx->num_comp = get_num_comp(ctx->cfi);
1581     ctx->comp_sft[Y_C][0] = 0;
1582     ctx->comp_sft[Y_C][1] = 0;
1583 
1584     for(int c = 1; c < ctx->num_comp; c++) {
1585         ctx->comp_sft[c][0] = get_chroma_sft_w(color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs)));
1586         ctx->comp_sft[c][1] = get_chroma_sft_h(color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs)));
1587     }
1588 
1589     ctx->w = oapv_align_value(ctx->fh.fi.frame_width, OAPV_MB_W);
1590     ctx->h = oapv_align_value(ctx->fh.fi.frame_height, OAPV_MB_H);
1591 
1592     if(OAPV_CS_GET_FORMAT(imgb->cs) == OAPV_CF_PLANAR2) {
1593         ctx->fn_block_to_imgb[Y_C] = block_to_imgb_p210_y;
1594         ctx->fn_block_to_imgb[U_C] = block_to_imgb_p210_uv;
1595         ctx->fn_block_to_imgb[V_C] = block_to_imgb_p210_uv;
1596     }
1597     else {
1598         for(int c = 0; c < ctx->num_comp; c++) {
1599             ctx->fn_block_to_imgb[c] = block_to_imgb_10bit;
1600         }
1601     }
1602 
1603     int tile_w = ctx->fh.tile_width_in_mbs * OAPV_MB_W;
1604     int tile_h = ctx->fh.tile_height_in_mbs * OAPV_MB_H;
1605 
1606     ctx->num_tile_cols = (ctx->w + (tile_w - 1)) / tile_w;
1607     ctx->num_tile_rows = (ctx->h + (tile_h - 1)) / tile_h;
1608     ctx->num_tiles = ctx->num_tile_cols * ctx->num_tile_rows;
1609 
1610     oapv_assert_rv((ctx->num_tile_cols <= OAPV_MAX_TILE_COLS) && (ctx->num_tile_rows <= OAPV_MAX_TILE_ROWS), OAPV_ERR_MALFORMED_BITSTREAM);
1611     dec_set_tile_info(ctx->tile, ctx->w, ctx->h, tile_w, tile_h, ctx->num_tile_cols, ctx->num_tiles);
1612 
1613     for(int i = 0; i < ctx->num_tiles; i++) {
1614         ctx->tile[i].bs_beg = NULL;
1615     }
1616     ctx->tile[0].bs_beg = oapv_bsr_sink(&ctx->bs);
1617 
1618     for(int i = 0; i < ctx->num_tiles; i++) {
1619         ctx->tile[i].stat = DEC_TILE_STAT_NOT_DECODED;
1620     }
1621 
1622     return OAPV_OK;
1623 }
1624 
dec_frm_finish(oapvd_ctx_t * ctx)1625 static int dec_frm_finish(oapvd_ctx_t *ctx)
1626 {
1627     oapv_mset(&ctx->bs, 0, sizeof(oapv_bs_t)); // clean data
1628     imgb_release(ctx->imgb);                   // decrease reference cnout
1629     ctx->imgb = NULL;
1630     return OAPV_OK;
1631 }
1632 
dec_tile_comp(oapvd_tile_t * tile,oapvd_ctx_t * ctx,oapvd_core_t * core,oapv_bs_t * bs,int c,int s_dst,void * dst)1633 static int dec_tile_comp(oapvd_tile_t *tile, oapvd_ctx_t *ctx, oapvd_core_t *core, oapv_bs_t *bs, int c, int s_dst, void *dst)
1634 {
1635     int  mb_h, mb_w, mb_y, mb_x, blk_y, blk_x;
1636     int  le, ri, to, bo;
1637     int  ret;
1638     s16 *d16;
1639 
1640     mb_h = OAPV_MB_H >> ctx->comp_sft[c][1];
1641     mb_w = OAPV_MB_W >> ctx->comp_sft[c][0];
1642 
1643     le = tile->x >> ctx->comp_sft[c][0];        // left position of tile
1644     ri = (tile->w >> ctx->comp_sft[c][0]) + le; // right pixel position of tile
1645     to = tile->y >> ctx->comp_sft[c][1];        // top pixel position of tile
1646     bo = (tile->h >> ctx->comp_sft[c][1]) + to; // bottom pixel position of tile
1647 
1648     for(mb_y = to; mb_y < bo; mb_y += mb_h) {
1649         for(mb_x = le; mb_x < ri; mb_x += mb_w) {
1650             for(blk_y = mb_y; blk_y < (mb_y + mb_h); blk_y += OAPV_BLK_H) {
1651                 for(blk_x = mb_x; blk_x < (mb_x + mb_w); blk_x += OAPV_BLK_W) {
1652                     // parse DC coefficient
1653                     ret = oapvd_vlc_dc_coeff(ctx, core, bs, &core->dc_diff, c);
1654                     oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1655 
1656                     // parse AC coefficient
1657                     ret = oapvd_vlc_ac_coeff(ctx, core, bs, core->coef, c);
1658                     oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1659                     DUMP_COEF(core->coef, OAPV_BLK_D, blk_x, blk_y, c);
1660 
1661                     // decode a block
1662                     ret = dec_block(ctx, core, OAPV_LOG2_BLK_W, OAPV_LOG2_BLK_H, c);
1663                     oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1664 
1665                     // copy decoded block to image buffer
1666                     d16 = (s16 *)((u8 *)dst + blk_y * s_dst) + blk_x;
1667                     ctx->fn_block_to_imgb[c](core->coef, OAPV_BLK_W, OAPV_BLK_H, (OAPV_BLK_W << 1), blk_x, s_dst, d16);
1668                 }
1669             }
1670         }
1671     }
1672 
1673     /* byte align */
1674     oapv_bsr_align8(bs);
1675     return OAPV_OK;
1676 }
1677 
dec_tile(oapvd_core_t * core,oapvd_tile_t * tile)1678 static int dec_tile(oapvd_core_t *core, oapvd_tile_t *tile)
1679 {
1680     int          ret, midx, x, y, c;
1681     oapvd_ctx_t *ctx = core->ctx;
1682     oapv_bs_t    bs;
1683 
1684     oapv_bsr_init(&bs, tile->bs_beg + OAPV_TILE_SIZE_LEN, tile->data_size, NULL);
1685     ret = oapvd_vlc_tile_header(&bs, ctx, &tile->th);
1686     oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1687     for(c = 0; c < ctx->num_comp; c++) {
1688         core->qp[c] = tile->th.tile_qp[c];
1689         int dq_scale = oapv_tbl_dq_scale[core->qp[c] % 6];
1690         core->dq_shift[c] = ctx->bit_depth - 2 - (core->qp[c] / 6);
1691 
1692         core->prev_dc_ctx[c] = 20;
1693         core->prev_1st_ac_ctx[c] = 0;
1694         core->prev_dc[c] = 0;
1695 
1696         midx = 0;
1697         for(y = 0; y < OAPV_BLK_H; y++) {
1698             for(x = 0; x < OAPV_BLK_W; x++) {
1699                 core->q_mat[c][midx++] = dq_scale * ctx->fh.q_matrix[c][y][x]; // 7bit + 8bit
1700             }
1701         }
1702     }
1703 
1704     for(c = 0; c < ctx->num_comp; c++) {
1705         int  tc, s_dst;
1706         s16 *dst;
1707 
1708         if(OAPV_CS_GET_FORMAT(ctx->imgb->cs) == OAPV_CF_PLANAR2) {
1709             tc = c > 0 ? 1 : 0;
1710             dst = ctx->imgb->a[tc];
1711             dst += (c > 1) ? 1 : 0;
1712             s_dst = ctx->imgb->s[tc];
1713         }
1714         else {
1715             dst = ctx->imgb->a[c];
1716             s_dst = ctx->imgb->s[c];
1717         }
1718 
1719         ret = dec_tile_comp(tile, ctx, core, &bs, c, s_dst, dst);
1720         oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1721     }
1722 
1723     oapvd_vlc_tile_dummy_data(&bs);
1724     return OAPV_OK;
1725 }
1726 
dec_thread_tile(void * arg)1727 static int dec_thread_tile(void *arg)
1728 {
1729     oapv_bs_t     bs;
1730     int           i, ret, run, tile_idx = 0, thread_ret = OAPV_OK;
1731 
1732     oapvd_core_t *core = (oapvd_core_t *)arg;
1733     oapvd_ctx_t  *ctx = core->ctx;
1734     oapvd_tile_t *tile = ctx->tile;
1735 
1736     while(1) {
1737         // find not decoded tile
1738         oapv_tpool_enter_cs(ctx->sync_obj);
1739         for(i = 0; i < ctx->num_tiles; i++) {
1740             if(tile[i].stat == DEC_TILE_STAT_NOT_DECODED) {
1741                 tile[i].stat = DEC_TILE_STAT_ON_DECODING;
1742                 tile_idx = i;
1743                 break;
1744             }
1745         }
1746         oapv_tpool_leave_cs(ctx->sync_obj);
1747         if(i == ctx->num_tiles) {
1748             break;
1749         }
1750 
1751         // wait until to know bistream start position
1752         run = 1;
1753         while(run) {
1754             oapv_tpool_enter_cs(ctx->sync_obj);
1755             if(tile[tile_idx].bs_beg != NULL) {
1756                 run = 0;
1757             }
1758             oapv_tpool_leave_cs(ctx->sync_obj);
1759         }
1760         /* read tile size */
1761         oapv_bsr_init(&bs, tile[tile_idx].bs_beg, OAPV_TILE_SIZE_LEN, NULL);
1762         ret = oapvd_vlc_tile_size(&bs, &tile[tile_idx].data_size);
1763         oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1764         oapv_assert_g(tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + (tile[tile_idx].data_size - 1) <= ctx->bs.end, ERR);
1765 
1766         oapv_tpool_enter_cs(ctx->sync_obj);
1767         if(tile_idx + 1 < ctx->num_tiles) {
1768             tile[tile_idx + 1].bs_beg = tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + tile[tile_idx].data_size;
1769         }
1770         else {
1771             ctx->tile_end = tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + tile[tile_idx].data_size;
1772         }
1773         oapv_tpool_leave_cs(ctx->sync_obj);
1774 
1775         ret = dec_tile(core, &tile[tile_idx]);
1776 
1777         oapv_tpool_enter_cs(ctx->sync_obj);
1778         if (OAPV_SUCCEEDED(ret)) {
1779             tile[tile_idx].stat = DEC_TILE_STAT_DECODED;
1780         }
1781         else {
1782             tile[tile_idx].stat = ret;
1783             thread_ret = ret;
1784         }
1785         tile[tile_idx].stat = OAPV_SUCCEEDED(ret) ? DEC_TILE_STAT_DECODED : ret;
1786         oapv_tpool_leave_cs(ctx->sync_obj);
1787     }
1788     return thread_ret;
1789 
1790 ERR:
1791     oapv_tpool_enter_cs(ctx->sync_obj);
1792     tile[tile_idx].stat = DEC_TILE_STAT_SIZE_ERROR;
1793     if (tile_idx + 1 < ctx->num_tiles)
1794     {
1795         tile[tile_idx + 1].bs_beg = tile[tile_idx].bs_beg;
1796     }
1797     oapv_tpool_leave_cs(ctx->sync_obj);
1798     return OAPV_ERR_MALFORMED_BITSTREAM;
1799 }
1800 
dec_flush(oapvd_ctx_t * ctx)1801 static void dec_flush(oapvd_ctx_t *ctx)
1802 {
1803     if(ctx->cdesc.threads >= 2) {
1804         if(ctx->tpool) {
1805             // thread controller instance is present
1806             // terminate the created thread
1807             for(int i = 0; i < ctx->cdesc.threads - 1; i++) {
1808                 if(ctx->thread_id[i]) {
1809                     // valid thread instance
1810                     ctx->tpool->release(&ctx->thread_id[i]);
1811                 }
1812             }
1813             // dinitialize the tpool
1814             oapv_tpool_deinit(ctx->tpool);
1815             oapv_mfree(ctx->tpool);
1816             ctx->tpool = NULL;
1817         }
1818     }
1819 
1820     oapv_tpool_sync_obj_delete(&(ctx->sync_obj));
1821 
1822     for(int i = 0; i < ctx->cdesc.threads; i++) {
1823         dec_core_free(ctx->core[i]);
1824     }
1825 }
1826 
dec_ready(oapvd_ctx_t * ctx)1827 static int dec_ready(oapvd_ctx_t *ctx)
1828 {
1829     int i, ret = OAPV_OK;
1830 
1831     if(ctx->core[0] == NULL) {
1832         // create cores
1833         for(i = 0; i < ctx->cdesc.threads; i++) {
1834             ctx->core[i] = dec_core_alloc();
1835             oapv_assert_gv(ctx->core[i], ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
1836             ctx->core[i]->ctx = ctx;
1837         }
1838     }
1839 
1840     // initialize the threads to NULL
1841     for(i = 0; i < OAPV_MAX_THREADS; i++) {
1842         ctx->thread_id[i] = 0;
1843     }
1844 
1845     // get the context synchronization handle
1846     ctx->sync_obj = oapv_tpool_sync_obj_create();
1847     oapv_assert_gv(ctx->sync_obj != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
1848 
1849     if(ctx->cdesc.threads >= 2) {
1850         ctx->tpool = oapv_malloc(sizeof(oapv_tpool_t));
1851         oapv_tpool_init(ctx->tpool, ctx->cdesc.threads - 1);
1852         for(i = 0; i < ctx->cdesc.threads - 1; i++) {
1853             ctx->thread_id[i] = ctx->tpool->create(ctx->tpool, i);
1854             oapv_assert_gv(ctx->thread_id[i] != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
1855         }
1856     }
1857     return OAPV_OK;
1858 
1859 ERR:
1860     dec_flush(ctx);
1861 
1862     return ret;
1863 }
1864 
dec_platform_init(oapvd_ctx_t * ctx)1865 static int dec_platform_init(oapvd_ctx_t *ctx)
1866 {
1867     // default settings
1868     ctx->fn_itx = oapv_tbl_fn_itx;
1869     ctx->fn_dquant = oapv_tbl_fn_dquant;
1870 
1871 #if X86_SSE
1872     int check_cpu, support_sse, support_avx2;
1873 
1874     check_cpu = oapv_check_cpu_info_x86();
1875     support_sse = (check_cpu >> 0) & 1;
1876     support_avx2 = (check_cpu >> 2) & 1;
1877 
1878     if(support_avx2) {
1879         ctx->fn_itx = oapv_tbl_fn_itx_avx;
1880         ctx->fn_dquant = oapv_tbl_fn_dquant_avx;
1881     }
1882     else if(support_sse) {
1883         ctx->fn_itx = oapv_tbl_fn_itx;
1884         ctx->fn_dquant = oapv_tbl_fn_dquant;
1885     }
1886 #elif ARM_NEON
1887     ctx->fn_itx = oapv_tbl_fn_itx_neon;
1888     ctx->fn_dquant = oapv_tbl_fn_dquant;
1889 #endif
1890     return OAPV_OK;
1891 }
1892 
oapvd_create(oapvd_cdesc_t * cdesc,int * err)1893 oapvd_t oapvd_create(oapvd_cdesc_t *cdesc, int *err)
1894 {
1895     oapvd_ctx_t *ctx;
1896     int          ret;
1897 
1898     DUMP_CREATE(0);
1899     ctx = NULL;
1900 
1901     /* check if any decoder argument is correctly set */
1902     oapv_assert_gv(cdesc->threads > 0 && cdesc->threads <= OAPV_MAX_THREADS, ret, OAPV_ERR_INVALID_ARGUMENT, ERR);
1903 
1904     /* memory allocation for ctx and core structure */
1905     ctx = (oapvd_ctx_t *)dec_ctx_alloc();
1906     oapv_assert_gv(ctx != NULL, ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
1907     oapv_mcpy(&ctx->cdesc, cdesc, sizeof(oapvd_cdesc_t));
1908 
1909     /* initialize platform-specific variables */
1910     ret = dec_platform_init(ctx);
1911     oapv_assert_g(ret == OAPV_OK, ERR);
1912 
1913     /* ready for decoding */
1914     ret = dec_ready(ctx);
1915     oapv_assert_g(ret == OAPV_OK, ERR);
1916 
1917     ctx->magic = OAPVD_MAGIC_CODE;
1918     ctx->id = (oapvd_t)ctx;
1919     if(err) {
1920         *err = OAPV_OK;
1921     }
1922     return (ctx->id);
1923 
1924 ERR:
1925     if(ctx) {
1926         dec_ctx_free(ctx);
1927     }
1928     if(err) {
1929         *err = ret;
1930     }
1931     return NULL;
1932 }
1933 
oapvd_delete(oapvd_t did)1934 void oapvd_delete(oapvd_t did)
1935 {
1936     oapvd_ctx_t *ctx;
1937     ctx = dec_id_to_ctx(did);
1938     oapv_assert_r(ctx);
1939 
1940     DUMP_DELETE();
1941     dec_flush(ctx);
1942     dec_ctx_free(ctx);
1943 }
1944 
oapvd_decode(oapvd_t did,oapv_bitb_t * bitb,oapv_frms_t * ofrms,oapvm_t mid,oapvd_stat_t * stat)1945 int oapvd_decode(oapvd_t did, oapv_bitb_t *bitb, oapv_frms_t *ofrms, oapvm_t mid, oapvd_stat_t *stat)
1946 {
1947     oapvd_ctx_t *ctx;
1948     oapv_bs_t   *bs;
1949     oapv_pbuh_t  pbuh;
1950     int          ret = OAPV_OK;
1951     u32          pbu_size;
1952     u32          cur_read_size = 0;
1953     int          frame_cnt = 0;
1954 
1955     ctx = dec_id_to_ctx(did);
1956     oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
1957 
1958     // read signature ('aPv1')
1959     oapv_assert_rv(bitb->ssize > 4, OAPV_ERR_MALFORMED_BITSTREAM);
1960     u32 signature = oapv_bsr_read_direct(bitb->addr, 32);
1961     oapv_assert_rv(signature == 0x61507631, OAPV_ERR_MALFORMED_BITSTREAM);
1962     cur_read_size += 4;
1963     stat->read += 4;
1964 
1965     do {
1966         u32 remain = bitb->ssize - cur_read_size;
1967         oapv_assert_gv((remain >= 8), ret, OAPV_ERR_MALFORMED_BITSTREAM, ERR);
1968         oapv_bsr_init(&ctx->bs, (u8 *)bitb->addr + cur_read_size, remain, NULL);
1969         bs = &ctx->bs;
1970 
1971         ret = oapvd_vlc_pbu_size(bs, &pbu_size); // read pbu_size (4 byte)
1972         oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1973         remain -= 4; // size of pbu_size syntax
1974         oapv_assert_gv(pbu_size <= remain, ret, OAPV_ERR_MALFORMED_BITSTREAM, ERR);
1975 
1976         ret = oapvd_vlc_pbu_header(bs, &pbuh);
1977         oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1978 
1979         if(pbuh.pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
1980            pbuh.pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME ||
1981            pbuh.pbu_type == OAPV_PBU_TYPE_PREVIEW_FRAME ||
1982            pbuh.pbu_type == OAPV_PBU_TYPE_DEPTH_FRAME ||
1983            pbuh.pbu_type == OAPV_PBU_TYPE_ALPHA_FRAME) {
1984 
1985             oapv_assert_gv(frame_cnt < OAPV_MAX_NUM_FRAMES, ret, OAPV_ERR_REACHED_MAX, ERR);
1986 
1987             ret = oapvd_vlc_frame_header(bs, &ctx->fh);
1988             oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1989 
1990             ret = dec_frm_prepare(ctx, ofrms->frm[frame_cnt].imgb);
1991             oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1992 
1993             int           res;
1994             oapv_tpool_t *tpool = ctx->tpool;
1995             int           parallel_task = 1;
1996             int           tidx = 0;
1997 
1998             parallel_task = (ctx->cdesc.threads > ctx->num_tiles) ? ctx->num_tiles : ctx->cdesc.threads;
1999 
2000             /* decode tiles ************************************/
2001             for(tidx = 0; tidx < (parallel_task - 1); tidx++) {
2002                 tpool->run(ctx->thread_id[tidx], dec_thread_tile,
2003                            (void *)ctx->core[tidx]);
2004             }
2005             ret = dec_thread_tile((void *)ctx->core[tidx]);
2006             for(tidx = 0; tidx < parallel_task - 1; tidx++) {
2007                 tpool->join(ctx->thread_id[tidx], &res);
2008                 if(OAPV_FAILED(res)) {
2009                     ret = res;
2010                 }
2011             }
2012             /****************************************************/
2013 
2014             /* READ FILLER HERE !!! */
2015 
2016             oapv_bsr_move(&ctx->bs, ctx->tile_end);
2017             stat->read += BSR_GET_READ_BYTE(&ctx->bs);
2018 
2019             copy_fh_to_finfo(&ctx->fh, pbuh.pbu_type, pbuh.group_id, &stat->aui.frm_info[frame_cnt]);
2020             if(ret == OAPV_OK && ctx->use_frm_hash) {
2021                 oapv_imgb_set_md5(ctx->imgb);
2022             }
2023             ret = dec_frm_finish(ctx); // FIX-ME
2024             oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
2025 
2026             ofrms->frm[frame_cnt].pbu_type = pbuh.pbu_type;
2027             ofrms->frm[frame_cnt].group_id = pbuh.group_id;
2028             stat->frm_size[frame_cnt] = pbu_size + 4 /* PUB size length*/;
2029             frame_cnt++;
2030         }
2031         else if(pbuh.pbu_type == OAPV_PBU_TYPE_METADATA) {
2032             ret = oapvd_vlc_metadata(bs, pbu_size, mid, pbuh.group_id);
2033             oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
2034 
2035             stat->read += BSR_GET_READ_BYTE(&ctx->bs);
2036         }
2037         else if(pbuh.pbu_type == OAPV_PBU_TYPE_FILLER) {
2038             ret = oapvd_vlc_filler(bs, (pbu_size - 4));
2039             oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
2040         }
2041         cur_read_size += pbu_size + 4;
2042     } while(cur_read_size < bitb->ssize);
2043     stat->aui.num_frms = frame_cnt;
2044     oapv_assert_gv(ofrms->num_frms == frame_cnt, ret, OAPV_ERR_MALFORMED_BITSTREAM, ERR);
2045     return ret;
2046 
2047 ERR:
2048     return ret;
2049 }
2050 
oapvd_config(oapvd_t did,int cfg,void * buf,int * size)2051 int oapvd_config(oapvd_t did, int cfg, void *buf, int *size)
2052 {
2053     oapvd_ctx_t *ctx;
2054 
2055     ctx = dec_id_to_ctx(did);
2056     oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
2057 
2058     switch(cfg) {
2059     /* set config ************************************************************/
2060     case OAPV_CFG_SET_USE_FRM_HASH:
2061         ctx->use_frm_hash = (*((int *)buf)) ? 1 : 0;
2062         break;
2063 
2064     default:
2065         oapv_assert_rv(0, OAPV_ERR_UNSUPPORTED);
2066     }
2067     return OAPV_OK;
2068 }
2069 
oapvd_info(void * au,int au_size,oapv_au_info_t * aui)2070 int oapvd_info(void *au, int au_size, oapv_au_info_t *aui)
2071 {
2072     int ret, frm_count = 0;
2073     u32 cur_read_size = 0;
2074 
2075     DUMP_SET(0);
2076 
2077     // read signature ('aPv1')
2078     oapv_assert_rv(au_size > 4, OAPV_ERR_MALFORMED_BITSTREAM);
2079     u32 signature = oapv_bsr_read_direct(au, 32);
2080     oapv_assert_rv(signature == 0x61507631, OAPV_ERR_MALFORMED_BITSTREAM);
2081     cur_read_size += 4;
2082 
2083     /* 'au' address contains series of PBU */
2084     do {
2085         oapv_bs_t bs;
2086         u32 pbu_size = 0;
2087         u32 remain = au_size - cur_read_size;
2088         oapv_assert_rv((remain >= 8), OAPV_ERR_MALFORMED_BITSTREAM);
2089         oapv_bsr_init(&bs, (u8 *)au + cur_read_size, remain, NULL);
2090 
2091         ret = oapvd_vlc_pbu_size(&bs, &pbu_size); // read pbu_size (4 byte)
2092         oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2093         remain -= 4; // size of pbu_size syntax
2094         oapv_assert_rv(pbu_size <= remain, OAPV_ERR_MALFORMED_BITSTREAM);
2095 
2096         /* pbu header */
2097         oapv_pbuh_t pbuh;
2098         ret = oapvd_vlc_pbu_header(&bs, &pbuh); // read pbu_header() (4 byte)
2099         oapv_assert_rv(OAPV_SUCCEEDED(ret), OAPV_ERR_MALFORMED_BITSTREAM);
2100         if(pbuh.pbu_type == OAPV_PBU_TYPE_AU_INFO) {
2101             // parse access_unit_info in PBU
2102             oapv_aui_t ai;
2103 
2104             ret = oapvd_vlc_au_info(&bs, &ai);
2105             oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2106 
2107             aui->num_frms = ai.num_frames;
2108             for(int i = 0; i < ai.num_frames; i++) {
2109                 copy_fi_to_finfo(&ai.frame_info[i], ai.pbu_type[i], ai.group_id[i], &aui->frm_info[i]);
2110             }
2111             return OAPV_OK; // founded access_unit_info, no need to read more PBUs
2112         }
2113         if(pbuh.pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
2114            pbuh.pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME ||
2115            pbuh.pbu_type == OAPV_PBU_TYPE_PREVIEW_FRAME ||
2116            pbuh.pbu_type == OAPV_PBU_TYPE_DEPTH_FRAME ||
2117            pbuh.pbu_type == OAPV_PBU_TYPE_ALPHA_FRAME) {
2118             // parse frame_info in PBU
2119             oapv_fi_t fi;
2120 
2121             oapv_assert_rv(frm_count < OAPV_MAX_NUM_FRAMES, OAPV_ERR_REACHED_MAX)
2122             ret = oapvd_vlc_frame_info(&bs, &fi);
2123             oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2124 
2125             copy_fi_to_finfo(&fi, pbuh.pbu_type, pbuh.group_id, &aui->frm_info[frm_count]);
2126             frm_count++;
2127         }
2128         aui->num_frms = frm_count;
2129         cur_read_size += pbu_size + 4; /* 4byte is for pbu_size syntax itself */
2130     } while(cur_read_size < au_size);
2131     DUMP_SET(1);
2132     return OAPV_OK;
2133 }
2134 
2135 ///////////////////////////////////////////////////////////////////////////////
2136 // end of decoder code
2137 #endif // ENABLE_DECODER
2138 ///////////////////////////////////////////////////////////////////////////////