1 /*
2 * Copyright (c) 2022 Samsung Electronics Co., Ltd.
3 * All Rights Reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * - Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * - Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * - Neither the name of the copyright owner, nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "oapv_def.h"
33
imgb_to_block(oapv_imgb_t * imgb,int c,int x_l,int y_l,int w_l,int h_l,s16 * block)34 static void imgb_to_block(oapv_imgb_t *imgb, int c, int x_l, int y_l, int w_l, int h_l, s16 *block)
35 {
36 u8 *src, *dst;
37 int i, sft_hor, sft_ver;
38 int bd = OAPV_CS_GET_BYTE_DEPTH(imgb->cs);
39
40 if(c == 0) {
41 sft_hor = sft_ver = 0;
42 }
43 else {
44 u8 cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs));
45 sft_hor = get_chroma_sft_w(cfi);
46 sft_ver = get_chroma_sft_h(cfi);
47 }
48
49 src = ((u8 *)imgb->a[c]) + ((y_l >> sft_ver) * imgb->s[c]) + ((x_l * bd) >> sft_hor);
50 dst = (u8 *)block;
51
52 for(i = 0; i < (h_l); i++) {
53 oapv_mcpy(dst, src, (w_l)*bd);
54
55 src += imgb->s[c];
56 dst += (w_l)*bd;
57 }
58 }
59
imgb_to_block_10bit(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)60 static void imgb_to_block_10bit(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
61 {
62 const int mid_val = (1 << (10 - 1));
63 s16 *s = (s16 *)src;
64 s16 *d = (s16 *)dst;
65
66 for(int h = 0; h < blk_h; h++) {
67 for(int w = 0; w < blk_w; w++) {
68 d[w] = s[w] - mid_val;
69 }
70 s = (s16 *)(((u8 *)s) + s_src);
71 d = (s16 *)(((u8 *)d) + s_dst);
72 }
73 }
74
imgb_to_block_p210_y(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)75 static void imgb_to_block_p210_y(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
76 {
77 const int mid_val = (1 << (10 - 1));
78 u16 *s = (s16 *)src;
79 s16 *d = (s16 *)dst;
80
81 for(int h = 0; h < blk_h; h++) {
82 for(int w = 0; w < blk_w; w++) {
83 d[w] = (s16)(s[w] >> 6) - mid_val;
84 }
85 s = (u16 *)(((u8 *)s) + s_src);
86 d = (s16 *)(((u8 *)d) + s_dst);
87 }
88 }
89
imgb_to_block_p210_uv(void * src,int blk_w,int blk_h,int s_src,int offset_src,int s_dst,void * dst)90 static void imgb_to_block_p210_uv(void *src, int blk_w, int blk_h, int s_src, int offset_src, int s_dst, void *dst)
91 {
92 const int mid_val = (1 << (10 - 1));
93 u16 *s = (u16 *)src + offset_src;
94 s16 *d = (s16 *)dst;
95
96 for(int h = 0; h < blk_h; h++) {
97 for(int w = 0; w < blk_w; w++) {
98 d[w] = (s16)(s[w * 2] >> 6) - mid_val;
99 }
100 s = (u16 *)(((u8 *)s) + s_src);
101 d = (s16 *)(((u8 *)d) + s_dst);
102 }
103 }
104
imgb_to_block_p210(oapv_imgb_t * imgb,int c,int x_l,int y_l,int w_l,int h_l,s16 * block)105 static void imgb_to_block_p210(oapv_imgb_t *imgb, int c, int x_l, int y_l, int w_l, int h_l, s16 *block)
106 {
107 u16 *src, *dst;
108 int sft_hor, sft_ver, s_src;
109 int bd = OAPV_CS_GET_BYTE_DEPTH(imgb->cs);
110 int size_scale = 1;
111 int tc = c;
112
113 if(c == 0) {
114 sft_hor = sft_ver = 0;
115 }
116 else {
117 u8 cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs));
118 sft_hor = get_chroma_sft_w(cfi);
119 sft_ver = get_chroma_sft_h(cfi);
120 size_scale = 2;
121 tc = 1;
122 }
123
124 s_src = imgb->s[tc] >> (bd > 1 ? 1 : 0);
125 src = ((u16 *)imgb->a[tc]) + ((y_l >> sft_ver) * s_src) + ((x_l * size_scale) >> sft_hor);
126 dst = (u16 *)block;
127
128 for(int i = 0; i < (h_l); i++) {
129 for(int j = 0; j < (w_l); j++) {
130 dst[j] = (src[j * size_scale + (c >> 1)] >> 6);
131 }
132 src += s_src;
133 dst += w_l;
134 }
135 }
136
block_to_imgb_10bit(void * src,int blk_w,int blk_h,int s_src,int offset_dst,int s_dst,void * dst)137 static void block_to_imgb_10bit(void *src, int blk_w, int blk_h, int s_src, int offset_dst, int s_dst, void *dst)
138 {
139 const int max_val = (1 << 10) - 1;
140 const int mid_val = (1 << (10 - 1));
141 s16 *s = (s16 *)src;
142 u16 *d = (u16 *)dst;
143
144 for(int h = 0; h < blk_h; h++) {
145 for(int w = 0; w < blk_w; w++) {
146 d[w] = oapv_clip3(0, max_val, s[w] + mid_val);
147 }
148 s = (s16 *)(((u8 *)s) + s_src);
149 d = (u16 *)(((u8 *)d) + s_dst);
150 }
151 }
152
block_to_imgb_p210_y(void * src,int blk_w,int blk_h,int s_src,int offset_dst,int s_dst,void * dst)153 static void block_to_imgb_p210_y(void *src, int blk_w, int blk_h, int s_src, int offset_dst, int s_dst, void *dst)
154 {
155 const int max_val = (1 << 10) - 1;
156 const int mid_val = (1 << (10 - 1));
157 s16 *s = (s16 *)src;
158 u16 *d = (u16 *)dst;
159
160 for(int h = 0; h < blk_h; h++) {
161 for(int w = 0; w < blk_w; w++) {
162 d[w] = oapv_clip3(0, max_val, s[w] + mid_val) << 6;
163 }
164 s = (s16 *)(((u8 *)s) + s_src);
165 d = (u16 *)(((u8 *)d) + s_dst);
166 }
167 }
168
block_to_imgb_p210_uv(void * src,int blk_w,int blk_h,int s_src,int x_pel,int s_dst,void * dst)169 static void block_to_imgb_p210_uv(void *src, int blk_w, int blk_h, int s_src, int x_pel, int s_dst, void *dst)
170 {
171 const int max_val = (1 << 10) - 1;
172 const int mid_val = (1 << (10 - 1));
173 s16 *s = (s16 *)src;
174
175 // x_pel is x-offset value from left boundary of picture in unit of pixel.
176 // the 'dst' address has calculated by
177 // dst = (s16*)((u8*)origin + y_pel*s_dst) + x_pel;
178 // in case of P210 color format,
179 // since 's_dst' is byte size of stride including all U and V pixel values,
180 // y-offset calculation is correct.
181 // however, the adding only x_pel is not enough to address the correct pixel
182 // position of U or V because U & V use the same buffer plane
183 // in interleaved way,
184 // so, the 'dst' address should be increased by 'x_pel' to address pixel
185 // position correctly.
186 u16 *d = (u16 *)dst + x_pel; // p210 pixel value needs 0~65535 range
187
188 for(int h = 0; h < blk_h; h++) {
189 for(int w = 0; w < blk_w; w++) {
190 d[w * 2] = ((u16)oapv_clip3(0, max_val, s[w] + mid_val)) << 6;
191 }
192 s = (s16 *)(((u8 *)s) + s_src);
193 d = (u16 *)(((u8 *)d) + s_dst);
194 }
195 }
196
plus_mid_val(s16 * coef,int b_w,int b_h,int bit_depth)197 static void plus_mid_val(s16 *coef, int b_w, int b_h, int bit_depth)
198 {
199 int mid_val = 1 << (bit_depth - 1);
200 for(int i = 0; i < b_h * b_w; i++) {
201 coef[i] = oapv_clip3(0, (1 << bit_depth) - 1, coef[i] + mid_val);
202 }
203 }
204
copy_fi_to_finfo(oapv_fi_t * fi,int pbu_type,int group_id,oapv_frm_info_t * finfo)205 static void copy_fi_to_finfo(oapv_fi_t *fi, int pbu_type, int group_id, oapv_frm_info_t *finfo)
206 {
207 finfo->w = (int)fi->frame_width; // casting to 'int' would be fine here
208 finfo->h = (int)fi->frame_height; // casting to 'int' would be fine here
209 finfo->cs = OAPV_CS_SET(chroma_format_idc_to_color_format(fi->chroma_format_idc), fi->bit_depth, 0);
210 finfo->pbu_type = pbu_type;
211 finfo->group_id = group_id;
212 finfo->profile_idc = fi->profile_idc;
213 finfo->level_idc = fi->level_idc;
214 finfo->band_idc = fi->band_idc;
215 finfo->chroma_format_idc = fi->chroma_format_idc;
216 finfo->bit_depth = fi->bit_depth;
217 finfo->capture_time_distance = fi->capture_time_distance;
218 }
219
copy_fh_to_finfo(oapv_fh_t * fh,int pbu_type,int group_id,oapv_frm_info_t * finfo)220 static void copy_fh_to_finfo(oapv_fh_t *fh, int pbu_type, int group_id, oapv_frm_info_t *finfo)
221 {
222 copy_fi_to_finfo(&fh->fi, pbu_type, group_id, finfo);
223 finfo->use_q_matrix = fh->use_q_matrix;
224 for(int c = 0; c < OAPV_MAX_CC; c++) {
225 int mod = (1 << OAPV_LOG2_BLK) - 1;
226 for(int i = 0; i < OAPV_BLK_D; i++) {
227 finfo->q_matrix[c][i] = fh->q_matrix[c][i >> OAPV_LOG2_BLK][i & mod];
228 }
229 }
230 finfo->color_description_present_flag = fh->color_description_present_flag;
231 finfo->color_primaries = fh->color_primaries;
232 finfo->transfer_characteristics = fh->transfer_characteristics;
233 finfo->matrix_coefficients = fh->matrix_coefficients;
234 finfo->full_range_flag = fh->full_range_flag;
235 }
236
237 ///////////////////////////////////////////////////////////////////////////////
238 // start of encoder code
239 #if ENABLE_ENCODER
240 ///////////////////////////////////////////////////////////////////////////////
241
enc_id_to_ctx(oapve_t id)242 static oapve_ctx_t *enc_id_to_ctx(oapve_t id)
243 {
244 oapve_ctx_t *ctx;
245 oapv_assert_rv(id, NULL);
246 ctx = (oapve_ctx_t *)id;
247 oapv_assert_rv((ctx)->magic == OAPVE_MAGIC_CODE, NULL);
248 return ctx;
249 }
250
enc_ctx_alloc(void)251 static oapve_ctx_t *enc_ctx_alloc(void)
252 {
253 oapve_ctx_t *ctx;
254 ctx = (oapve_ctx_t *)oapv_malloc_fast(sizeof(oapve_ctx_t));
255 oapv_assert_rv(ctx, NULL);
256 oapv_mset_x64a(ctx, 0, sizeof(oapve_ctx_t));
257 return ctx;
258 }
259
enc_ctx_free(oapve_ctx_t * ctx)260 static void enc_ctx_free(oapve_ctx_t *ctx)
261 {
262 oapv_mfree_fast(ctx);
263 }
264
enc_core_alloc()265 static oapve_core_t *enc_core_alloc()
266 {
267 oapve_core_t *core;
268 core = (oapve_core_t *)oapv_malloc_fast(sizeof(oapve_core_t));
269
270 oapv_assert_rv(core, NULL);
271 oapv_mset_x64a(core, 0, sizeof(oapve_core_t));
272
273 return core;
274 }
275
enc_core_free(oapve_core_t * core)276 static void enc_core_free(oapve_core_t *core)
277 {
278 oapv_mfree_fast(core);
279 }
280
enc_core_init(oapve_core_t * core,oapve_ctx_t * ctx,int tile_idx,int thread_idx)281 static int enc_core_init(oapve_core_t *core, oapve_ctx_t *ctx, int tile_idx, int thread_idx)
282 {
283 core->tile_idx = tile_idx;
284 core->ctx = ctx;
285 return OAPV_OK;
286 }
287
enc_minus_mid_val(s16 * coef,int w_blk,int h_blk,int bit_depth)288 static void enc_minus_mid_val(s16 *coef, int w_blk, int h_blk, int bit_depth)
289 {
290 int mid_val = 1 << (bit_depth - 1);
291 for(int i = 0; i < h_blk * w_blk; i++) {
292 coef[i] -= mid_val;
293 }
294 }
295
enc_set_tile_info(oapve_tile_t * ti,int w_pel,int h_pel,int tile_w,int tile_h,int * num_tile_cols,int * num_tile_rows,int * num_tiles)296 static int enc_set_tile_info(oapve_tile_t *ti, int w_pel, int h_pel, int tile_w,
297 int tile_h, int *num_tile_cols, int *num_tile_rows, int *num_tiles)
298 {
299 (*num_tile_cols) = (w_pel + (tile_w - 1)) / tile_w;
300 (*num_tile_rows) = (h_pel + (tile_h - 1)) / tile_h;
301 (*num_tiles) = (*num_tile_cols) * (*num_tile_rows);
302
303 for(int i = 0; i < (*num_tiles); i++) {
304 int tx = (i % (*num_tile_cols)) * tile_w;
305 int ty = (i / (*num_tile_cols)) * tile_h;
306 ti[i].x = tx;
307 ti[i].y = ty;
308 ti[i].w = tx + tile_w > w_pel ? w_pel - tx : tile_w;
309 ti[i].h = ty + tile_h > h_pel ? h_pel - ty : tile_h;
310 }
311 return OAPV_OK;
312 }
313
enc_block(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)314 static double enc_block(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
315 {
316 int bit_depth = ctx->bit_depth;
317
318 oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
319 ctx->fn_quant[0](core->coef, core->qp[c], core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 128 : 212);
320
321 core->dc_diff = core->coef[0] - core->prev_dc[c];
322 core->prev_dc[c] = core->coef[0];
323
324 if(ctx->rec) {
325 oapv_mcpy(core->coef_rec, core->coef, sizeof(s16) * OAPV_BLK_D);
326 ctx->fn_dquant[0](core->coef_rec, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
327 ctx->fn_itx[0](core->coef_rec, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
328 }
329
330 return 0;
331 }
332
enc_block_rdo_slow(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)333 static double enc_block_rdo_slow(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
334 {
335 ALIGNED_16(s16 recon[OAPV_BLK_D]) = { 0 };
336 ALIGNED_16(s16 coeff[OAPV_BLK_D]) = { 0 };
337 int blk_w = 1 << log2_w;
338 int blk_h = 1 << log2_h;
339 int bit_depth = ctx->bit_depth;
340 int qp = core->qp[c];
341 s16 org[OAPV_BLK_D] = { 0 };
342 s16 *best_coeff = core->coef;
343 s16 *best_recon = core->coef_rec;
344 int best_cost = INT_MAX;
345 int zero_dist = 0;
346 const u16 *scanp = oapv_tbl_scan;
347 const int map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
348
349 oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
350 oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
351 oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
352 ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
353
354 {
355 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
356 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
357 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
358 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
359 oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
360 if(ctx->rec) {
361 oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
362 }
363 if(cost == 0) {
364 zero_dist = 1;
365 }
366 best_cost = cost;
367 }
368
369 for(int itr = 0; itr < (c == 0 ? 2 : 1) && !zero_dist; itr++) {
370 for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
371 int best_idx = 0;
372 s16 org_coef = coeff[scanp[j]];
373 int adj_rng = c == 0 ? 13 : 5;
374 if(org_coef == 0) {
375 if(c == 0 && scanp[j] < 3) {
376 adj_rng = 3;
377 }
378 else {
379 continue;
380 }
381 }
382
383 for(int i = 1; i < adj_rng && !zero_dist; i++) {
384 if(i > 2) {
385 if(best_idx == 0) {
386 continue;
387 }
388 else if(best_idx % 2 == 1 && i % 2 == 0) {
389 continue;
390 }
391 else if(best_idx % 2 == 0 && i % 2 == 1) {
392 continue;
393 }
394 }
395
396 s16 test_coef = org_coef + map_idx_diff[i];
397 coeff[scanp[j]] = test_coef;
398
399 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
400 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
401 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
402 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
403
404 if(cost < best_cost) {
405 best_cost = cost;
406 best_coeff[scanp[j]] = test_coef;
407 if(ctx->rec) {
408 oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
409 }
410 best_idx = i;
411 if(cost == 0) {
412 zero_dist = 1;
413 }
414 }
415 else {
416 coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
417 }
418 }
419 }
420 }
421
422 core->dc_diff = best_coeff[0] - core->prev_dc[c];
423 core->prev_dc[c] = best_coeff[0];
424
425 return best_cost;
426 }
427
enc_block_rdo_medium(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)428 static double enc_block_rdo_medium(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
429 {
430 ALIGNED_16(s16 org[OAPV_BLK_D]);
431 ALIGNED_16(s16 recon[OAPV_BLK_D]);
432 ALIGNED_16(s16 coeff[OAPV_BLK_D]);
433 ALIGNED_16(s16 tmp_buf[OAPV_BLK_D]);
434
435 ALIGNED_32(int rec_ups[OAPV_BLK_D]);
436 ALIGNED_32(int rec_tmp[OAPV_BLK_D]);
437
438 int blk_w = 1 << log2_w;
439 int blk_h = 1 << log2_h;
440 int bit_depth = ctx->bit_depth;
441 int qp = core->qp[c];
442
443 s16 *best_coeff = core->coef;
444 s16 *best_recon = core->coef_rec;
445
446 int best_cost = INT_MAX;
447 int zero_dist = 0;
448 const u16 *scanp = oapv_tbl_scan;
449 const int map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
450
451 oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
452 oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
453 oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
454
455 ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
456
457 {
458 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
459 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
460 ctx->fn_itx_part[0](recon, tmp_buf, ITX_SHIFT1, 1 << log2_w);
461 oapv_itx_get_wo_sft(tmp_buf, recon, rec_ups, ITX_SHIFT2(bit_depth), 1 << log2_h);
462
463 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
464 oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
465 if(ctx->rec) {
466 oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
467 }
468 if(cost == 0) {
469 zero_dist = 1;
470 }
471 best_cost = cost;
472 }
473
474 for(int itr = 0; itr < (c == 0 ? 2 : 1) && !zero_dist; itr++) {
475 for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
476 int best_idx = 0;
477 s16 org_coef = coeff[scanp[j]];
478 int adj_rng = (c == 0 ? 13 : 5);
479 if(org_coef == 0) {
480 if(c == 0 && scanp[j] < 3) {
481 adj_rng = 3;
482 }
483 else {
484 continue;
485 }
486 }
487 int q_step = 0;
488 if(core->dq_shift[c] > 0) {
489 q_step = (core->q_mat_dec[c][scanp[j]] + (1 << (core->dq_shift[c] - 1))) >> core->dq_shift[c];
490 }
491 else {
492 q_step = (core->q_mat_dec[c][scanp[j]]) << (-core->dq_shift[c]);
493 }
494
495 for(int i = 1; i < adj_rng && !zero_dist; i++) {
496 if(i > 2) {
497 if(best_idx == 0) {
498 continue;
499 }
500 else if(best_idx % 2 == 1 && i % 2 == 0) {
501 continue;
502 }
503 else if(best_idx % 2 == 0 && i % 2 == 1) {
504 continue;
505 }
506 }
507
508 s16 test_coef = org_coef + map_idx_diff[i];
509 coeff[scanp[j]] = test_coef;
510 int step_diff = q_step * map_idx_diff[i];
511 ctx->fn_itx_adj[0](rec_ups, rec_tmp, j, step_diff, 9);
512 for(int k = 0; k < 64; k++) {
513 recon[k] = (rec_tmp[k] + 512) >> 10;
514 }
515
516 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
517 if(cost < best_cost) {
518 oapv_mcpy(rec_ups, rec_tmp, sizeof(int) * OAPV_BLK_D);
519 best_cost = cost;
520 best_coeff[scanp[j]] = test_coef;
521 best_idx = i;
522 if(cost == 0) {
523 zero_dist = 1;
524 }
525 }
526 else {
527 coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
528 }
529 }
530 }
531 }
532
533 if(ctx->rec) {
534 oapv_mcpy(best_recon, best_coeff, sizeof(s16) * OAPV_BLK_D);
535 ctx->fn_dquant[0](best_recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
536 ctx->fn_itx[0](best_recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
537 }
538
539 core->dc_diff = best_coeff[0] - core->prev_dc[c];
540 core->prev_dc[c] = best_coeff[0];
541
542 return best_cost;
543 }
544
enc_block_rdo_placebo(oapve_ctx_t * ctx,oapve_core_t * core,int log2_w,int log2_h,int c)545 static double enc_block_rdo_placebo(oapve_ctx_t *ctx, oapve_core_t *core, int log2_w, int log2_h, int c)
546 {
547 int blk_w = 1 << log2_w;
548 int blk_h = 1 << log2_h;
549 int bit_depth = ctx->bit_depth;
550 int qp = core->qp[c];
551 s16 *best_coeff = core->coef;
552 s16 *best_recon = core->coef_rec;
553 ALIGNED_16(s16 org[OAPV_BLK_D]);
554 ALIGNED_16(s16 recon[OAPV_BLK_D]);
555 ALIGNED_16(s16 coeff[OAPV_BLK_D]);
556 int best_cost = INT_MAX;
557 int zero_dist = 0;
558 const u16 *scanp = oapv_tbl_scan;
559 const int map_idx_diff[15] = { 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 };
560
561 oapv_mcpy(org, core->coef, sizeof(s16) * OAPV_BLK_D);
562 oapv_trans(ctx, core->coef, log2_w, log2_h, bit_depth);
563 oapv_mcpy(coeff, core->coef, sizeof(s16) * OAPV_BLK_D);
564
565 ctx->fn_quant[0](coeff, qp, core->q_mat_enc[c], log2_w, log2_h, bit_depth, c ? 112 : 212);
566
567 {
568 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
569 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
570 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
571 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
572 oapv_mcpy(best_coeff, coeff, sizeof(s16) * OAPV_BLK_D);
573 if(ctx->rec) {
574 oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
575 }
576 if(cost == 0) {
577 zero_dist = 1;
578 }
579 best_cost = cost;
580 }
581
582 for(int itr = 0; itr < (c == 0 ? 7 : 3) && !zero_dist; itr++) {
583 for(int j = 0; j < OAPV_BLK_D && !zero_dist; j++) {
584 int best_idx = 0;
585 s16 org_coef = coeff[scanp[j]];
586 int adj_rng = (c == 0 ? 15 : 5);
587 if(org_coef == 0) {
588 if(c == 0 && scanp[j] < 3) {
589 adj_rng = 3;
590 }
591 else {
592 continue;
593 }
594 }
595
596 for(int i = 1; i < adj_rng && !zero_dist; i++) {
597 if(i > 2) {
598 if(best_idx == 0) {
599 continue;
600 }
601 else if(best_idx % 2 == 1 && i % 2 == 0) {
602 continue;
603 }
604 else if(best_idx % 2 == 0 && i % 2 == 1) {
605 continue;
606 }
607 }
608
609 s16 test_coef = org_coef + map_idx_diff[i];
610 coeff[scanp[j]] = test_coef;
611
612 oapv_mcpy(recon, coeff, sizeof(s16) * OAPV_BLK_D);
613 ctx->fn_dquant[0](recon, core->q_mat_dec[c], log2_w, log2_h, core->dq_shift[c]);
614 ctx->fn_itx[0](recon, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
615 int cost = (int)ctx->fn_ssd[0](blk_w, blk_h, org, recon, blk_w, blk_w);
616
617 if(cost < best_cost) {
618 best_cost = cost;
619 best_coeff[scanp[j]] = test_coef;
620 if(ctx->rec) {
621 oapv_mcpy(best_recon, recon, sizeof(s16) * OAPV_BLK_D);
622 }
623 best_idx = i;
624 if(cost == 0) {
625 zero_dist = 1;
626 }
627 }
628 else {
629 coeff[scanp[j]] = org_coef + map_idx_diff[best_idx];
630 }
631 }
632 }
633 }
634
635 core->dc_diff = best_coeff[0] - core->prev_dc[c];
636 core->prev_dc[c] = best_coeff[0];
637
638 return best_cost;
639 }
640
enc_read_param(oapve_ctx_t * ctx,oapve_param_t * param)641 static int enc_read_param(oapve_ctx_t *ctx, oapve_param_t *param)
642 {
643 /* check input parameters */
644 oapv_assert_rv(param->w > 0 && param->h > 0, OAPV_ERR_INVALID_ARGUMENT);
645 oapv_assert_rv(param->qp >= MIN_QUANT && param->qp <= MAX_QUANT(10), OAPV_ERR_INVALID_ARGUMENT);
646
647 ctx->qp_offset[Y_C] = 0;
648 ctx->qp_offset[U_C] = param->qp_offset_c1;
649 ctx->qp_offset[V_C] = param->qp_offset_c2;
650 ctx->qp_offset[X_C] = param->qp_offset_c3;
651
652 ctx->num_comp = get_num_comp(param->csp);
653
654 for(int i = 0; i < ctx->num_comp; i++) {
655 ctx->qp[i] = oapv_clip3(MIN_QUANT, MAX_QUANT(10), param->qp + ctx->qp_offset[i]);
656 }
657
658 if(param->preset == OAPV_PRESET_PLACEBO) {
659 ctx->fn_enc_blk = enc_block_rdo_placebo;
660 }
661 else if(param->preset == OAPV_PRESET_SLOW) {
662 ctx->fn_enc_blk = enc_block_rdo_slow;
663 }
664 else if(param->preset == OAPV_PRESET_MEDIUM) {
665 ctx->fn_enc_blk = enc_block_rdo_medium;
666 }
667 else {
668 ctx->fn_enc_blk = enc_block;
669 }
670
671 ctx->log2_block = OAPV_LOG2_BLK;
672
673 /* set various value */
674 ctx->w = ((param->w + (OAPV_MB_W - 1)) >> OAPV_LOG2_MB_W) << OAPV_LOG2_MB_W;
675 ctx->h = ((param->h + (OAPV_MB_H - 1)) >> OAPV_LOG2_MB_H) << OAPV_LOG2_MB_H;
676
677 int tile_w = param->tile_w_mb * OAPV_MB_W;
678 int tile_h = param->tile_h_mb * OAPV_MB_H;
679 enc_set_tile_info(ctx->tile, ctx->w, ctx->h, tile_w, tile_h, &ctx->num_tile_cols, &ctx->num_tile_rows, &ctx->num_tiles);
680
681 return OAPV_OK;
682 }
683
enc_flush(oapve_ctx_t * ctx)684 static void enc_flush(oapve_ctx_t *ctx)
685 {
686 // Release thread pool controller and created threads
687 if(ctx->cdesc.threads >= 1) {
688 if(ctx->tpool) {
689 // thread controller instance is present
690 // terminate the created thread
691 for(int i = 0; i < ctx->cdesc.threads; i++) {
692 if(ctx->thread_id[i]) {
693 // valid thread instance
694 ctx->tpool->release(&ctx->thread_id[i]);
695 }
696 }
697 // dinitialize the tc
698 oapv_tpool_deinit(ctx->tpool);
699 oapv_mfree_fast(ctx->tpool);
700 ctx->tpool = NULL;
701 }
702 }
703
704 oapv_tpool_sync_obj_delete(&ctx->sync_obj);
705 for(int i = 0; i < ctx->cdesc.threads; i++) {
706 enc_core_free(ctx->core[i]);
707 ctx->core[i] = NULL;
708 }
709
710 oapv_mfree_fast(ctx->tile[0].bs_buf);
711 }
712
enc_ready(oapve_ctx_t * ctx)713 static int enc_ready(oapve_ctx_t *ctx)
714 {
715 oapve_core_t *core = NULL;
716 int ret = OAPV_OK;
717 oapv_assert(ctx->core[0] == NULL);
718
719 for(int i = 0; i < ctx->cdesc.threads; i++) {
720 core = enc_core_alloc();
721 oapv_assert_gv(core != NULL, ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
722 ctx->core[i] = core;
723 }
724
725 // initialize the threads to NULL
726 for(int i = 0; i < OAPV_MAX_THREADS; i++) {
727 ctx->thread_id[i] = 0;
728 }
729
730 // get the context synchronization handle
731 ctx->sync_obj = oapv_tpool_sync_obj_create();
732 oapv_assert_gv(ctx->sync_obj != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
733
734 if(ctx->cdesc.threads >= 1) {
735 ctx->tpool = oapv_malloc(sizeof(oapv_tpool_t));
736 oapv_tpool_init(ctx->tpool, ctx->cdesc.threads);
737 for(int i = 0; i < ctx->cdesc.threads; i++) {
738 ctx->thread_id[i] = ctx->tpool->create(ctx->tpool, i);
739 oapv_assert_gv(ctx->thread_id[i] != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
740 }
741 }
742
743 for(int i = 0; i < OAPV_MAX_TILES; i++) {
744 ctx->tile[i].stat = ENC_TILE_STAT_NOT_ENCODED;
745 }
746 ctx->tile[0].bs_buf = (u8 *)oapv_malloc(ctx->cdesc.max_bs_buf_size);
747 oapv_assert_gv(ctx->tile[0].bs_buf, ret, OAPV_ERR_UNKNOWN, ERR);
748
749 ctx->rc_param.alpha = OAPV_RC_ALPHA;
750 ctx->rc_param.beta = OAPV_RC_BETA;
751
752 return OAPV_OK;
753 ERR:
754
755 enc_flush(ctx);
756
757 return ret;
758 }
759
enc_tile_comp(oapv_bs_t * bs,oapve_tile_t * tile,oapve_ctx_t * ctx,oapve_core_t * core,int c,int s_org,void * org,int s_rec,void * rec)760 static int enc_tile_comp(oapv_bs_t *bs, oapve_tile_t *tile, oapve_ctx_t *ctx, oapve_core_t *core, int c, int s_org, void *org, int s_rec, void *rec)
761 {
762 int mb_h, mb_w, mb_y, mb_x, blk_x, blk_y;
763 s16 *o16 = NULL, *r16 = NULL;
764
765 u8 *bs_cur = oapv_bsw_sink(bs);
766 oapv_assert_rv(bsw_is_align8(bs), OAPV_ERR_MALFORMED_BITSTREAM);
767
768 mb_w = OAPV_MB_W >> ctx->comp_sft[c][0];
769 mb_h = OAPV_MB_H >> ctx->comp_sft[c][1];
770
771 int tile_le = tile->x >> ctx->comp_sft[c][0];
772 int tile_ri = (tile->w >> ctx->comp_sft[c][0]) + tile_le;
773 int tile_to = tile->y >> ctx->comp_sft[c][1];
774 int tile_bo = (tile->h >> ctx->comp_sft[c][1]) + tile_to;
775
776 for(mb_y = tile_to; mb_y < tile_bo; mb_y += mb_h) {
777 for(mb_x = tile_le; mb_x < tile_ri; mb_x += mb_w) {
778 for(blk_y = mb_y; blk_y < (mb_y + mb_h); blk_y += OAPV_BLK_H) {
779 for(blk_x = mb_x; blk_x < (mb_x + mb_w); blk_x += OAPV_BLK_W) {
780 o16 = (s16 *)((u8 *)org + blk_y * s_org) + blk_x;
781 ctx->fn_imgb_to_blk[c](o16, OAPV_BLK_W, OAPV_BLK_H, s_org, blk_x, (OAPV_BLK_W << 1), core->coef);
782
783 ctx->fn_enc_blk(ctx, core, OAPV_LOG2_BLK_W, OAPV_LOG2_BLK_H, c);
784 oapve_vlc_dc_coeff(ctx, core, bs, core->dc_diff, c);
785 oapve_vlc_ac_coeff(ctx, core, bs, core->coef, 0, c);
786 DUMP_COEF(core->coef, OAPV_BLK_D, blk_x, blk_y, c);
787
788 if(rec != NULL) {
789 r16 = (s16 *)((u8 *)rec + blk_y * s_rec) + blk_x;
790 ctx->fn_blk_to_imgb[c](core->coef_rec, OAPV_BLK_W, OAPV_BLK_H, (OAPV_BLK_W << 1), blk_x, s_rec, r16);
791 }
792 }
793 }
794 }
795 }
796
797 /* byte align */
798 while(!bsw_is_align8(bs)) {
799 oapv_bsw_write1(bs, 0);
800 }
801
802 /* de-init BSW */
803 oapv_bsw_deinit(bs);
804
805 return (int)(bs->cur - bs_cur);
806 }
807
enc_tile(oapve_ctx_t * ctx,oapve_core_t * core,oapve_tile_t * tile)808 static int enc_tile(oapve_ctx_t *ctx, oapve_core_t *core, oapve_tile_t *tile)
809 {
810 oapv_bs_t bs;
811 oapv_bsw_init(&bs, tile->bs_buf, tile->bs_buf_max, NULL);
812
813 int qp = 0;
814 if(ctx->param->rc_type != OAPV_RC_CQP) {
815 oapve_rc_get_qp(ctx, tile, ctx->qp[Y_C], &qp);
816 }
817 else {
818 qp = ctx->qp[Y_C];
819 }
820
821 tile->tile_size = 0;
822 DUMP_SAVE(0);
823 oapve_vlc_tile_size(&bs, tile->tile_size);
824 oapve_set_tile_header(ctx, &tile->th, core->tile_idx, qp);
825 oapve_vlc_tile_header(ctx, &bs, &tile->th);
826
827 for(int c = 0; c < ctx->num_comp; c++) {
828 int cnt = 0;
829 core->qp[c] = tile->th.tile_qp[c];
830 int qscale = oapv_quant_scale[core->qp[c] % 6];
831 s32 scale_multiply_16 = (s32)(qscale << 4); // 15bit + 4bit
832 for(int y = 0; y < OAPV_BLK_H; y++) {
833 for(int x = 0; x < OAPV_BLK_W; x++) {
834 core->q_mat_enc[c][cnt++] = scale_multiply_16 / ctx->fh.q_matrix[c][y][x];
835 }
836 }
837
838 if(ctx->rec || ctx->param->preset >= OAPV_PRESET_MEDIUM) {
839 core->dq_shift[c] = ctx->bit_depth - 2 - (core->qp[c] / 6);
840
841 int cnt = 0;
842 int dq_scale = oapv_tbl_dq_scale[core->qp[c] % 6];
843 for(int y = 0; y < OAPV_BLK_H; y++) {
844 for(int x = 0; x < OAPV_BLK_W; x++) {
845 core->q_mat_dec[c][cnt++] = dq_scale * ctx->fh.q_matrix[c][y][x];
846 }
847 }
848 }
849 }
850
851 for(int c = 0; c < ctx->num_comp; c++) {
852 core->prev_dc_ctx[c] = 20;
853 core->prev_1st_ac_ctx[c] = 0;
854 core->prev_dc[c] = 0;
855
856 int tc, s_org, s_rec;
857 s16 *org, *rec;
858
859 if(OAPV_CS_GET_FORMAT(ctx->imgb->cs) == OAPV_CF_PLANAR2) {
860 tc = c > 0 ? 1 : 0;
861 org = ctx->imgb->a[tc];
862 org += (c > 1) ? 1 : 0;
863 s_org = ctx->imgb->s[tc];
864
865 if(ctx->rec) {
866 rec = ctx->rec->a[tc];
867 rec += (c > 1) ? 1 : 0;
868 s_rec = ctx->imgb->s[tc];
869 }
870 else {
871 rec = NULL;
872 s_rec = 0;
873 }
874 }
875 else {
876 org = ctx->imgb->a[c];
877 s_org = ctx->imgb->s[c];
878 if(ctx->rec) {
879 rec = ctx->rec->a[c];
880 s_rec = ctx->imgb->s[c];
881 }
882 else {
883 rec = NULL;
884 s_rec = 0;
885 }
886 }
887
888 tile->th.tile_data_size[c] = enc_tile_comp(&bs, tile, ctx, core, c, s_org, org, s_rec, rec);
889 }
890
891 u32 bs_size = (int)(bs.cur - bs.beg);
892 if(bs_size > tile->bs_buf_max) {
893 return OAPV_ERR_OUT_OF_BS_BUF;
894 }
895 tile->bs_size = bs_size;
896
897 oapv_bs_t bs_th;
898 bs_th.is_bin_count = 0;
899 oapv_bsw_init(&bs_th, tile->bs_buf, tile->bs_size, NULL);
900 tile->tile_size = bs_size - OAPV_TILE_SIZE_LEN;
901
902 DUMP_SAVE(1);
903 DUMP_LOAD(0);
904 oapve_vlc_tile_size(&bs_th, tile->tile_size);
905 oapve_vlc_tile_header(ctx, &bs_th, &tile->th);
906 DUMP_LOAD(1);
907 oapv_bsw_deinit(&bs_th);
908 return OAPV_OK;
909 }
910
enc_thread_tile(void * arg)911 static int enc_thread_tile(void *arg)
912 {
913 oapve_core_t *core = (oapve_core_t *)arg;
914 oapve_ctx_t *ctx = core->ctx;
915 oapve_tile_t *tile = ctx->tile;
916 int ret = OAPV_OK, i;
917
918 while(1) {
919 // find not encoded tile
920 oapv_tpool_enter_cs(ctx->sync_obj);
921 for(i = 0; i < ctx->num_tiles; i++) {
922 if(tile[i].stat == ENC_TILE_STAT_NOT_ENCODED) {
923 tile[i].stat = ENC_TILE_STAT_ON_ENCODING;
924 core->tile_idx = i;
925 break;
926 }
927 }
928 oapv_tpool_leave_cs(ctx->sync_obj);
929 if(i == ctx->num_tiles) {
930 break;
931 }
932
933 ret = enc_tile(ctx, core, &tile[core->tile_idx]);
934 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
935
936 oapv_tpool_enter_cs(ctx->sync_obj);
937 tile[core->tile_idx].stat = ENC_TILE_STAT_ENCODED;
938 oapv_tpool_leave_cs(ctx->sync_obj);
939 }
940 ERR:
941 return ret;
942 }
943
enc_img_pad_p210(oapve_ctx_t * ctx,oapv_imgb_t * imgb)944 static void enc_img_pad_p210(oapve_ctx_t *ctx, oapv_imgb_t *imgb)
945 {
946 if(ctx->w == ctx->param->w && ctx->h == ctx->param->h) {
947 return;
948 }
949
950 if(ctx->w != ctx->param->w) {
951 for(int c = 0; c < imgb->np; c++) {
952 int shift_w = 0;
953 int shift_h = 0;
954
955 int sw = ctx->param->w >> shift_w;
956 int ew = ctx->w >> shift_w;
957 int th = ctx->h >> shift_h;
958 pel *dst = (pel *)imgb->a[c];
959 pel src;
960
961 for(int h = 0; h < th; h++) {
962 src = dst[sw - 1];
963 for(int w = sw; w < ew; w++) {
964 dst[w] = src;
965 }
966 dst += (imgb->s[c] >> 1);
967 }
968 }
969 }
970
971 if(ctx->h != ctx->param->h) {
972 for(int c = 0; c < imgb->np; c++) {
973 int shift_w = 0;
974 int shift_h = 0;
975
976 int sh = ctx->param->h >> shift_h;
977 int eh = ctx->h >> shift_h;
978 int tw = ctx->w >> shift_w;
979 pel *dst = ((pel *)imgb->a[c]) + sh * (imgb->s[c] >> 1);
980 pel *src = dst - (imgb->s[c] >> 1);
981
982 for(int h = sh; h < eh; h++) {
983 oapv_mcpy(dst, src, sizeof(pel) * tw);
984 dst += (imgb->s[c] >> 1);
985 }
986 }
987 }
988 }
enc_img_pad(oapve_ctx_t * ctx,oapv_imgb_t * imgb)989 static void enc_img_pad(oapve_ctx_t *ctx, oapv_imgb_t *imgb)
990 {
991 if(ctx->w == ctx->param->w && ctx->h == ctx->param->h) {
992 return;
993 }
994
995 if(ctx->w != ctx->param->w) {
996 for(int c = 0; c < imgb->np; c++) {
997 int sw = ctx->param->w >> ctx->comp_sft[c][0];
998 int ew = ctx->w >> ctx->comp_sft[c][0];
999 int th = ctx->h >> ctx->comp_sft[c][1];
1000 pel *dst = (pel *)imgb->a[c];
1001 pel src;
1002
1003 for(int h = 0; h < th; h++) {
1004 src = dst[sw - 1];
1005 for(int w = sw; w < ew; w++) {
1006 dst[w] = src;
1007 }
1008 dst += (imgb->s[c] >> 1);
1009 }
1010 }
1011 }
1012
1013 if(ctx->h != ctx->param->h) {
1014 for(int c = 0; c < imgb->np; c++) {
1015 int sh = ctx->param->h >> ctx->comp_sft[c][1];
1016 int eh = ctx->h >> ctx->comp_sft[c][1];
1017 int tw = ctx->w >> ctx->comp_sft[c][0];
1018 pel *dst = ((pel *)imgb->a[c]) + sh * (imgb->s[c] >> 1);
1019 pel *src = dst - (imgb->s[c] >> 1);
1020
1021 for(int h = sh; h < eh; h++) {
1022 oapv_mcpy(dst, src, sizeof(pel) * tw);
1023 dst += (imgb->s[c] >> 1);
1024 }
1025 }
1026 }
1027 }
1028
enc_frm_prepare(oapve_ctx_t * ctx,oapv_imgb_t * imgb_i,oapv_imgb_t * imgb_r)1029 static int enc_frm_prepare(oapve_ctx_t *ctx, oapv_imgb_t *imgb_i, oapv_imgb_t *imgb_r)
1030 {
1031 ctx->cfi = color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb_i->cs));
1032 ctx->num_comp = get_num_comp(ctx->cfi);
1033
1034 ctx->comp_sft[Y_C][0] = 0;
1035 ctx->comp_sft[Y_C][1] = 0;
1036 for(int c = 1; c < ctx->num_comp; c++) {
1037 ctx->comp_sft[c][0] = get_chroma_sft_w(ctx->cfi);
1038 ctx->comp_sft[c][1] = get_chroma_sft_h(ctx->cfi);
1039 }
1040
1041 ctx->bit_depth = OAPV_CS_GET_BIT_DEPTH(imgb_i->cs);
1042
1043 if(OAPV_CS_GET_FORMAT(imgb_i->cs) == OAPV_CF_PLANAR2) {
1044 ctx->fn_imgb_to_blk_rc = imgb_to_block_p210;
1045
1046 ctx->fn_imgb_to_blk[Y_C] = imgb_to_block_p210_y;
1047 ctx->fn_imgb_to_blk[U_C] = imgb_to_block_p210_uv;
1048 ctx->fn_imgb_to_blk[V_C] = imgb_to_block_p210_uv;
1049
1050 ctx->fn_blk_to_imgb[Y_C] = block_to_imgb_p210_y;
1051 ctx->fn_blk_to_imgb[U_C] = block_to_imgb_p210_uv;
1052 ctx->fn_blk_to_imgb[V_C] = block_to_imgb_p210_uv;
1053 ctx->fn_img_pad = enc_img_pad_p210;
1054 }
1055 else {
1056 ctx->fn_imgb_to_blk_rc = imgb_to_block;
1057 for(int i = 0; i < ctx->num_comp; i++) {
1058 ctx->fn_imgb_to_blk[i] = imgb_to_block_10bit;
1059 ctx->fn_blk_to_imgb[i] = block_to_imgb_10bit;
1060 }
1061 ctx->fn_img_pad = enc_img_pad;
1062 }
1063
1064 /* initialize bitstream container */
1065 // oapv_bsw_init(&ctx->bs, bitb->addr, bitb->bsize, NULL); // TODO : remove
1066 ctx->w = (imgb_i->aw[Y_C] > 0) ? imgb_i->aw[Y_C] : imgb_i->w[Y_C];
1067 ctx->h = (imgb_i->ah[Y_C] > 0) ? imgb_i->ah[Y_C] : imgb_i->h[Y_C];
1068
1069 ctx->fn_img_pad(ctx, imgb_i);
1070
1071 for(int i = 0; i < ctx->num_tiles; i++) {
1072 ctx->tile[i].stat = ENC_TILE_STAT_NOT_ENCODED;
1073 }
1074
1075 ctx->imgb = imgb_i;
1076 imgb_addref(ctx->imgb);
1077 if(imgb_r != NULL) {
1078 for(int c = 0; c < ctx->num_comp; c++) {
1079 imgb_r->w[c] = imgb_i->w[c];
1080 imgb_r->h[c] = imgb_i->h[c];
1081 imgb_r->x[c] = imgb_i->x[c];
1082 imgb_r->y[c] = imgb_i->y[c];
1083 }
1084 ctx->rec = imgb_r;
1085 imgb_addref(ctx->rec);
1086 }
1087
1088 int buf_size = ctx->cdesc.max_bs_buf_size / ctx->num_tiles;
1089 ctx->tile[0].bs_buf_max = buf_size;
1090 for(int i = 1; i < ctx->num_tiles; i++) {
1091 ctx->tile[i].bs_buf = ctx->tile[i - 1].bs_buf + buf_size;
1092 ctx->tile[i].bs_buf_max = buf_size;
1093 }
1094
1095 for(int i = 0; i < ctx->cdesc.threads; i++) {
1096 ctx->core[i]->ctx = ctx;
1097 ctx->core[i]->thread_idx = i;
1098 }
1099
1100 return OAPV_OK;
1101 }
1102
enc_frm_finish(oapve_ctx_t * ctx,oapve_stat_t * stat)1103 static int enc_frm_finish(oapve_ctx_t *ctx, oapve_stat_t *stat)
1104 {
1105 imgb_release(ctx->imgb);
1106 if(ctx->rec) {
1107 imgb_release(ctx->rec);
1108 ctx->rec = NULL;
1109 }
1110 return OAPV_OK;
1111 }
1112
enc_frame(oapve_ctx_t * ctx)1113 static int enc_frame(oapve_ctx_t *ctx)
1114 {
1115 oapv_bs_t *bs = &ctx->bs;
1116 int ret = OAPV_OK;
1117
1118 oapv_bs_t bs_fh;
1119 oapv_mcpy(&bs_fh, bs, sizeof(oapv_bs_t));
1120
1121 /* write frame header */
1122 oapve_set_frame_header(ctx, &ctx->fh);
1123 oapve_vlc_frame_header(bs, ctx, &ctx->fh);
1124
1125 /* de-init BSW */
1126 oapv_bsw_deinit(bs);
1127
1128 /* rc init */
1129 u64 cost_sum = 0;
1130 if(ctx->param->rc_type != OAPV_RC_CQP) {
1131 oapve_rc_get_tile_cost_thread(ctx, &cost_sum);
1132
1133 double bits_pic = ((double)ctx->param->bitrate * 1000) / ((double)ctx->param->fps_num / ctx->param->fps_den);
1134 for(int i = 0; i < ctx->num_tiles; i++) {
1135 ctx->tile[i].rc.target_bits_left = bits_pic * ctx->tile[i].rc.cost / cost_sum;
1136 ctx->tile[i].rc.target_bits = ctx->tile[i].rc.target_bits_left;
1137 }
1138
1139 ctx->rc_param.lambda = oapve_rc_estimate_pic_lambda(ctx, cost_sum);
1140 ctx->rc_param.qp = oapve_rc_estimate_pic_qp(ctx->rc_param.lambda);
1141
1142 for(int c = 0; c < ctx->num_comp; c++) {
1143 ctx->qp[c] = oapv_clip3(MIN_QUANT, MAX_QUANT(10), ctx->rc_param.qp + ctx->qp_offset[c]);
1144 }
1145 }
1146
1147 oapv_tpool_t *tpool = ctx->tpool;
1148 int res, tidx = 0, thread_num1 = 0;
1149 int parallel_task = (ctx->cdesc.threads > ctx->num_tiles) ? ctx->num_tiles : ctx->cdesc.threads;
1150
1151 /* encode tiles ************************************/
1152 for(tidx = 0; tidx < (parallel_task - 1); tidx++) {
1153 tpool->run(ctx->thread_id[tidx], enc_thread_tile,
1154 (void *)ctx->core[tidx]);
1155 }
1156 ret = enc_thread_tile((void *)ctx->core[tidx]);
1157 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1158
1159 for(thread_num1 = 0; thread_num1 < parallel_task - 1; thread_num1++) {
1160 res = tpool->join(ctx->thread_id[thread_num1], &ret);
1161 oapv_assert_gv(res == TPOOL_SUCCESS, ret, OAPV_ERR_FAILED_SYSCALL, ERR);
1162 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1163 }
1164 /****************************************************/
1165
1166 for(int i = 0; i < ctx->num_tiles; i++) {
1167 oapv_mcpy(ctx->bs.cur, ctx->tile[i].bs_buf, ctx->tile[i].bs_size);
1168 ctx->bs.cur = ctx->bs.cur + ctx->tile[i].bs_size;
1169 ctx->fh.tile_size[i] = ctx->tile[i].bs_size - OAPV_TILE_SIZE_LEN;
1170 }
1171
1172 /* rewrite frame header */
1173 if(ctx->fh.tile_size_present_in_fh_flag) {
1174 oapve_vlc_frame_header(&bs_fh, ctx, &ctx->fh);
1175 /* de-init BSW */
1176 oapv_bsw_sink(&bs_fh);
1177 }
1178 if(ctx->param->rc_type != 0) {
1179 oapve_rc_update_after_pic(ctx, cost_sum);
1180 }
1181 return ret;
1182
1183 ERR:
1184 return ret;
1185 }
1186
enc_platform_init(oapve_ctx_t * ctx)1187 static int enc_platform_init(oapve_ctx_t *ctx)
1188 {
1189 // default settings
1190 ctx->fn_sad = oapv_tbl_fn_sad_16b;
1191 ctx->fn_ssd = oapv_tbl_fn_ssd_16b;
1192 ctx->fn_diff = oapv_tbl_fn_diff_16b;
1193 ctx->fn_itx_part = oapv_tbl_fn_itx_part;
1194 ctx->fn_itx = oapv_tbl_fn_itx;
1195 ctx->fn_itx_adj = oapv_tbl_fn_itx_adj;
1196 ctx->fn_txb = oapv_tbl_fn_tx;
1197 ctx->fn_quant = oapv_tbl_fn_quant;
1198 ctx->fn_dquant = oapv_tbl_fn_dquant;
1199 ctx->fn_had8x8 = oapv_dc_removed_had8x8;
1200 #if X86_SSE
1201 int check_cpu, support_sse, support_avx2;
1202
1203 check_cpu = oapv_check_cpu_info_x86();
1204 support_sse = (check_cpu >> 0) & 1;
1205 support_avx2 = (check_cpu >> 2) & 1;
1206
1207 if(support_avx2) {
1208 ctx->fn_sad = oapv_tbl_fn_sad_16b_avx;
1209 ctx->fn_ssd = oapv_tbl_fn_ssd_16b_avx;
1210 ctx->fn_diff = oapv_tbl_fn_diff_16b_avx;
1211 ctx->fn_itx_part = oapv_tbl_fn_itx_part_avx;
1212 ctx->fn_itx = oapv_tbl_fn_itx_avx;
1213 ctx->fn_itx_adj = oapv_tbl_fn_itx_adj_avx;
1214 ctx->fn_txb = oapv_tbl_fn_txb_avx;
1215 ctx->fn_quant = oapv_tbl_fn_quant_avx;
1216 ctx->fn_dquant = oapv_tbl_fn_dquant_avx;
1217 ctx->fn_had8x8 = oapv_dc_removed_had8x8_sse;
1218 }
1219 else if(support_sse) {
1220 ctx->fn_ssd = oapv_tbl_fn_ssd_16b_sse;
1221 ctx->fn_had8x8 = oapv_dc_removed_had8x8_sse;
1222 }
1223 #elif ARM_NEON
1224 ctx->fn_sad = oapv_tbl_fn_sad_16b_neon;
1225 ctx->fn_ssd = oapv_tbl_fn_ssd_16b_neon;
1226 ctx->fn_diff = oapv_tbl_fn_diff_16b_neon;
1227 ctx->fn_itx = oapv_tbl_fn_itx_neon;
1228 ctx->fn_txb = oapv_tbl_fn_txb_neon;
1229 ctx->fn_quant = oapv_tbl_fn_quant_neon;
1230 ctx->fn_had8x8 = oapv_dc_removed_had8x8;
1231 #endif
1232 return OAPV_OK;
1233 }
1234
oapve_create(oapve_cdesc_t * cdesc,int * err)1235 oapve_t oapve_create(oapve_cdesc_t *cdesc, int *err)
1236 {
1237 oapve_ctx_t *ctx;
1238 int ret;
1239
1240 DUMP_CREATE(1);
1241 /* memory allocation for ctx and core structure */
1242 ctx = (oapve_ctx_t *)enc_ctx_alloc();
1243 if(ctx != NULL) {
1244 oapv_mcpy(&ctx->cdesc, cdesc, sizeof(oapve_cdesc_t));
1245 ret = enc_platform_init(ctx);
1246 oapv_assert_g(ret == OAPV_OK, ERR);
1247
1248 ret = enc_ready(ctx);
1249 oapv_assert_g(ret == OAPV_OK, ERR);
1250
1251 /* set default value for ctx */
1252 ctx->magic = OAPVE_MAGIC_CODE;
1253 ctx->id = (oapve_t)ctx;
1254 if(err) {
1255 *err = OAPV_OK;
1256 }
1257 return (ctx->id);
1258 }
1259 else {
1260 ret = OAPV_ERR;
1261 }
1262 ERR:
1263 if(ctx) {
1264 enc_ctx_free(ctx);
1265 }
1266 if(err) {
1267 *err = ret;
1268 }
1269 return NULL;
1270 }
1271
oapve_delete(oapve_t eid)1272 void oapve_delete(oapve_t eid)
1273 {
1274 oapve_ctx_t *ctx;
1275
1276 ctx = enc_id_to_ctx(eid);
1277 oapv_assert_r(ctx);
1278
1279 DUMP_DELETE();
1280 enc_flush(ctx);
1281 enc_ctx_free(ctx);
1282 }
1283
oapve_encode(oapve_t eid,oapv_frms_t * ifrms,oapvm_t mid,oapv_bitb_t * bitb,oapve_stat_t * stat,oapv_frms_t * rfrms)1284 int oapve_encode(oapve_t eid, oapv_frms_t *ifrms, oapvm_t mid, oapv_bitb_t *bitb, oapve_stat_t *stat, oapv_frms_t *rfrms)
1285 {
1286 oapve_ctx_t *ctx;
1287 oapv_frm_t *frm;
1288 oapv_bs_t *bs;
1289 int i, ret;
1290
1291 ctx = enc_id_to_ctx(eid);
1292 oapv_assert_rv(ctx != NULL && bitb->addr && bitb->bsize > 0, OAPV_ERR_INVALID_ARGUMENT);
1293
1294 bs = &ctx->bs;
1295
1296 oapv_bsw_init(bs, bitb->addr, bitb->bsize, NULL);
1297 oapv_mset(stat, 0, sizeof(oapve_stat_t));
1298
1299 u8 *bs_pos_au_beg = oapv_bsw_sink(bs); // address syntax of au size
1300 u8 *bs_pos_pbu_beg;
1301 oapv_bs_t bs_pbu_beg;
1302 oapv_bsw_write(bs, 0, 32);
1303
1304 oapv_bsw_write(bs, 0x61507631, 32); // signature ('aPv1')
1305
1306 for(i = 0; i < ifrms->num_frms; i++) {
1307 frm = &ifrms->frm[i];
1308
1309 /* set default value for encoding parameter */
1310 ctx->param = &ctx->cdesc.param[i];
1311 ret = enc_read_param(ctx, ctx->param);
1312 oapv_assert_rv(ret == OAPV_OK, OAPV_ERR);
1313
1314 oapv_assert_rv(ctx->param->profile_idc == OAPV_PROFILE_422_10, OAPV_ERR_UNSUPPORTED);
1315
1316 // prepare for encoding a frame
1317 ret = enc_frm_prepare(ctx, frm->imgb, (rfrms != NULL) ? rfrms->frm[i].imgb : NULL);
1318 oapv_assert_rv(ret == OAPV_OK, ret);
1319
1320 bs_pos_pbu_beg = oapv_bsw_sink(bs); /* store pbu pos to calculate size */
1321 oapv_mcpy(&bs_pbu_beg, bs, sizeof(oapv_bs_t)); /* store pbu pos of ai to re-write */
1322
1323 DUMP_SAVE(0);
1324 oapve_vlc_pbu_size(bs, 0);
1325 oapve_vlc_pbu_header(bs, frm->pbu_type, frm->group_id);
1326 // encode a frame
1327 ret = enc_frame(ctx);
1328 oapv_assert_rv(ret == OAPV_OK, ret);
1329
1330 // rewrite pbu_size
1331 int pbu_size = ((u8 *)oapv_bsw_sink(bs)) - bs_pos_pbu_beg - 4;
1332 DUMP_SAVE(1);
1333 DUMP_LOAD(0);
1334 oapve_vlc_pbu_size(&bs_pbu_beg, pbu_size);
1335 DUMP_LOAD(1);
1336
1337 stat->frm_size[i] = pbu_size + 4 /* PUB size length*/;
1338 copy_fh_to_finfo(&ctx->fh, frm->pbu_type, frm->group_id, &stat->aui.frm_info[i]);
1339
1340 // add frame hash value of reconstructed frame into metadata list
1341 if(ctx->use_frm_hash) {
1342 if(frm->pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
1343 frm->pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME) {
1344 oapv_assert_rv(mid != NULL, OAPV_ERR_INVALID_ARGUMENT);
1345 ret = oapv_set_md5_pld(mid, frm->group_id, ctx->rec);
1346 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1347 }
1348 }
1349
1350 // finishing of encoding a frame
1351 ret = enc_frm_finish(ctx, stat);
1352 oapv_assert_rv(ret == OAPV_OK, ret);
1353 }
1354 stat->aui.num_frms = ifrms->num_frms;
1355
1356 oapvm_ctx_t *md_list = mid;
1357 if(md_list != NULL) {
1358 int num_md = md_list->num;
1359 for(i = 0; i < num_md; i++) {
1360 int group_id = md_list->group_ids[i];
1361 bs_pos_pbu_beg = oapv_bsw_sink(bs); /* store pbu pos to calculate size */
1362 oapv_mcpy(&bs_pbu_beg, bs, sizeof(oapv_bs_t)); /* store pbu pos of ai to re-write */
1363 DUMP_SAVE(0);
1364
1365 oapve_vlc_pbu_size(bs, 0);
1366 oapve_vlc_pbu_header(bs, OAPV_PBU_TYPE_METADATA, group_id);
1367 oapve_vlc_metadata(&md_list->md_arr[i], bs);
1368
1369 // rewrite pbu_size
1370 int pbu_size = ((u8 *)oapv_bsw_sink(bs)) - bs_pos_pbu_beg - 4;
1371 DUMP_SAVE(1);
1372 DUMP_LOAD(0);
1373 oapve_vlc_pbu_size(&bs_pbu_beg, pbu_size);
1374 DUMP_LOAD(1);
1375 }
1376 }
1377
1378 u32 au_size = (u32)((u8 *)oapv_bsw_sink(bs) - bs_pos_au_beg) - 4;
1379 oapv_bsw_write_direct(bs_pos_au_beg, au_size, 32); /* u(32) */
1380
1381 oapv_bsw_deinit(&ctx->bs); /* de-init BSW */
1382 stat->write = bsw_get_write_byte(&ctx->bs);
1383
1384 return OAPV_OK;
1385 }
1386
oapve_config(oapve_t eid,int cfg,void * buf,int * size)1387 int oapve_config(oapve_t eid, int cfg, void *buf, int *size)
1388 {
1389 oapve_ctx_t *ctx;
1390 int t0;
1391
1392 ctx = enc_id_to_ctx(eid);
1393 oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
1394
1395 switch(cfg) {
1396 /* set config **********************************************************/
1397 case OAPV_CFG_SET_QP:
1398 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1399 t0 = *((int *)buf);
1400 oapv_assert_rv(t0 >= MIN_QUANT && t0 <= MAX_QUANT(10),
1401 OAPV_ERR_INVALID_ARGUMENT);
1402 ctx->param->qp = t0;
1403 break;
1404 case OAPV_CFG_SET_FPS_NUM:
1405 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1406 t0 = *((int *)buf);
1407 oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1408 ctx->param->fps_num = t0;
1409 break;
1410 case OAPV_CFG_SET_FPS_DEN:
1411 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1412 t0 = *((int *)buf);
1413 oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1414 ctx->param->fps_den = t0;
1415 break;
1416 case OAPV_CFG_SET_BPS:
1417 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1418 t0 = *((int *)buf);
1419 oapv_assert_rv(t0 > 0, OAPV_ERR_INVALID_ARGUMENT);
1420 ctx->param->bitrate = t0;
1421 break;
1422 case OAPV_CFG_SET_USE_FRM_HASH:
1423 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1424 ctx->use_frm_hash = (*((int *)buf)) ? 1 : 0;
1425 break;
1426 /* get config *******************************************************/
1427 case OAPV_CFG_GET_QP:
1428 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1429 *((int *)buf) = ctx->param->qp;
1430 break;
1431 case OAPV_CFG_GET_WIDTH:
1432 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1433 *((int *)buf) = ctx->param->w;
1434 break;
1435 case OAPV_CFG_GET_HEIGHT:
1436 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1437 *((int *)buf) = ctx->param->h;
1438 break;
1439 case OAPV_CFG_GET_FPS_NUM:
1440 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1441 *((int *)buf) = ctx->param->fps_num;
1442 break;
1443 case OAPV_CFG_GET_FPS_DEN:
1444 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1445 *((int *)buf) = ctx->param->fps_den;
1446 break;
1447 case OAPV_CFG_GET_BPS:
1448 oapv_assert_rv(*size == sizeof(int), OAPV_ERR_INVALID_ARGUMENT);
1449 *((int *)buf) = ctx->param->bitrate;
1450 break;
1451 default:
1452 oapv_trace("unknown config value (%d)\n", cfg);
1453 oapv_assert_rv(0, OAPV_ERR_UNSUPPORTED);
1454 }
1455
1456 return OAPV_OK;
1457 }
1458
oapve_param_default(oapve_param_t * param)1459 int oapve_param_default(oapve_param_t *param)
1460 {
1461 oapv_mset(param, 0, sizeof(oapve_param_t));
1462 param->preset = OAPV_PRESET_DEFAULT;
1463
1464 param->qp_offset_c1 = 0;
1465 param->qp_offset_c2 = 0;
1466 param->qp_offset_c3 = 0;
1467
1468 param->tile_w_mb = 16;
1469 param->tile_h_mb = 16;
1470
1471 param->profile_idc = OAPV_PROFILE_422_10;
1472 param->level_idc = (int)((4.1 * 30.0) + 0.5);
1473 param->band_idc = 2;
1474
1475 param->use_q_matrix = 0;
1476
1477 param->color_description_present_flag = 0;
1478 param->color_primaries = 2; // unspecified color primaries
1479 param->transfer_characteristics = 2; // unspecified transfer characteristics
1480 param->matrix_coefficients = 2; // unspecified matrix coefficients
1481 param->full_range_flag = 0; // limited range
1482
1483 for(int c = 0; c < OAPV_MAX_CC; c++) {
1484 for(int i = 0; i < OAPV_BLK_D; i++) {
1485 param->q_matrix[c][i] = 16;
1486 }
1487 }
1488
1489 return OAPV_OK;
1490 }
1491
1492 ///////////////////////////////////////////////////////////////////////////////
1493 // enc of encoder code
1494 #endif // ENABLE_ENCODER
1495 ///////////////////////////////////////////////////////////////////////////////
1496
1497 ///////////////////////////////////////////////////////////////////////////////
1498 // start of decoder code
1499 #if ENABLE_DECODER
1500 ///////////////////////////////////////////////////////////////////////////////
dec_id_to_ctx(oapvd_t id)1501 static oapvd_ctx_t *dec_id_to_ctx(oapvd_t id)
1502 {
1503 oapvd_ctx_t *ctx;
1504 oapv_assert_rv(id, NULL);
1505 ctx = (oapvd_ctx_t *)id;
1506 oapv_assert_rv(ctx->magic == OAPVD_MAGIC_CODE, NULL);
1507 return ctx;
1508 }
1509
dec_ctx_alloc(void)1510 static oapvd_ctx_t *dec_ctx_alloc(void)
1511 {
1512 oapvd_ctx_t *ctx;
1513
1514 ctx = (oapvd_ctx_t *)oapv_malloc_fast(sizeof(oapvd_ctx_t));
1515
1516 oapv_assert_rv(ctx != NULL, NULL);
1517 oapv_mset_x64a(ctx, 0, sizeof(oapvd_ctx_t));
1518
1519 return ctx;
1520 }
1521
dec_ctx_free(oapvd_ctx_t * ctx)1522 static void dec_ctx_free(oapvd_ctx_t *ctx)
1523 {
1524 oapv_mfree_fast(ctx);
1525 }
1526
dec_core_alloc(void)1527 static oapvd_core_t *dec_core_alloc(void)
1528 {
1529 oapvd_core_t *core;
1530
1531 core = (oapvd_core_t *)oapv_malloc_fast(sizeof(oapvd_core_t));
1532
1533 oapv_assert_rv(core, NULL);
1534 oapv_mset_x64a(core, 0, sizeof(oapvd_core_t));
1535
1536 return core;
1537 }
1538
dec_core_free(oapvd_core_t * core)1539 static void dec_core_free(oapvd_core_t *core)
1540 {
1541 oapv_mfree_fast(core);
1542 }
1543
dec_block(oapvd_ctx_t * ctx,oapvd_core_t * core,int log2_w,int log2_h,int c)1544 static int dec_block(oapvd_ctx_t *ctx, oapvd_core_t *core, int log2_w, int log2_h, int c)
1545 {
1546 int bit_depth = ctx->bit_depth;
1547
1548 // DC prediction
1549 core->coef[0] = core->dc_diff + core->prev_dc[c];
1550 core->prev_dc[c] = core->coef[0];
1551 // Inverse quantization
1552 ctx->fn_dquant[0](core->coef, core->q_mat[c], log2_w, log2_h, core->dq_shift[c]);
1553 // Inverse transform
1554 ctx->fn_itx[0](core->coef, ITX_SHIFT1, ITX_SHIFT2(bit_depth), 1 << log2_w);
1555 return OAPV_OK;
1556 }
1557
dec_set_tile_info(oapvd_tile_t * tile,int w_pel,int h_pel,int tile_w,int tile_h,int num_tile_cols,int num_tiles)1558 static int dec_set_tile_info(oapvd_tile_t* tile, int w_pel, int h_pel, int tile_w, int tile_h, int num_tile_cols, int num_tiles)
1559 {
1560
1561 for (int i = 0; i < num_tiles; i++)
1562 {
1563 int tx = (i % (num_tile_cols)) * tile_w;
1564 int ty = (i / (num_tile_cols)) * tile_h;
1565 tile[i].x = tx;
1566 tile[i].y = ty;
1567 tile[i].w = tx + tile_w > w_pel ? w_pel - tx : tile_w;
1568 tile[i].h = ty + tile_h > h_pel ? h_pel - ty : tile_h;
1569 }
1570 return OAPV_OK;
1571 }
1572
dec_frm_prepare(oapvd_ctx_t * ctx,oapv_imgb_t * imgb)1573 static int dec_frm_prepare(oapvd_ctx_t *ctx, oapv_imgb_t *imgb)
1574 {
1575 ctx->imgb = imgb;
1576 imgb_addref(ctx->imgb); // increase reference count
1577
1578 ctx->bit_depth = ctx->fh.fi.bit_depth;
1579 ctx->cfi = ctx->fh.fi.chroma_format_idc;
1580 ctx->num_comp = get_num_comp(ctx->cfi);
1581 ctx->comp_sft[Y_C][0] = 0;
1582 ctx->comp_sft[Y_C][1] = 0;
1583
1584 for(int c = 1; c < ctx->num_comp; c++) {
1585 ctx->comp_sft[c][0] = get_chroma_sft_w(color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs)));
1586 ctx->comp_sft[c][1] = get_chroma_sft_h(color_format_to_chroma_format_idc(OAPV_CS_GET_FORMAT(imgb->cs)));
1587 }
1588
1589 ctx->w = oapv_align_value(ctx->fh.fi.frame_width, OAPV_MB_W);
1590 ctx->h = oapv_align_value(ctx->fh.fi.frame_height, OAPV_MB_H);
1591
1592 if(OAPV_CS_GET_FORMAT(imgb->cs) == OAPV_CF_PLANAR2) {
1593 ctx->fn_block_to_imgb[Y_C] = block_to_imgb_p210_y;
1594 ctx->fn_block_to_imgb[U_C] = block_to_imgb_p210_uv;
1595 ctx->fn_block_to_imgb[V_C] = block_to_imgb_p210_uv;
1596 }
1597 else {
1598 for(int c = 0; c < ctx->num_comp; c++) {
1599 ctx->fn_block_to_imgb[c] = block_to_imgb_10bit;
1600 }
1601 }
1602
1603 int tile_w = ctx->fh.tile_width_in_mbs * OAPV_MB_W;
1604 int tile_h = ctx->fh.tile_height_in_mbs * OAPV_MB_H;
1605
1606 ctx->num_tile_cols = (ctx->w + (tile_w - 1)) / tile_w;
1607 ctx->num_tile_rows = (ctx->h + (tile_h - 1)) / tile_h;
1608 ctx->num_tiles = ctx->num_tile_cols * ctx->num_tile_rows;
1609
1610 oapv_assert_rv((ctx->num_tile_cols <= OAPV_MAX_TILE_COLS) && (ctx->num_tile_rows <= OAPV_MAX_TILE_ROWS), OAPV_ERR_MALFORMED_BITSTREAM);
1611 dec_set_tile_info(ctx->tile, ctx->w, ctx->h, tile_w, tile_h, ctx->num_tile_cols, ctx->num_tiles);
1612
1613 for(int i = 0; i < ctx->num_tiles; i++) {
1614 ctx->tile[i].bs_beg = NULL;
1615 }
1616 ctx->tile[0].bs_beg = oapv_bsr_sink(&ctx->bs);
1617
1618 for(int i = 0; i < ctx->num_tiles; i++) {
1619 ctx->tile[i].stat = DEC_TILE_STAT_NOT_DECODED;
1620 }
1621
1622 return OAPV_OK;
1623 }
1624
dec_frm_finish(oapvd_ctx_t * ctx)1625 static int dec_frm_finish(oapvd_ctx_t *ctx)
1626 {
1627 oapv_mset(&ctx->bs, 0, sizeof(oapv_bs_t)); // clean data
1628 imgb_release(ctx->imgb); // decrease reference cnout
1629 ctx->imgb = NULL;
1630 return OAPV_OK;
1631 }
1632
dec_tile_comp(oapvd_tile_t * tile,oapvd_ctx_t * ctx,oapvd_core_t * core,oapv_bs_t * bs,int c,int s_dst,void * dst)1633 static int dec_tile_comp(oapvd_tile_t *tile, oapvd_ctx_t *ctx, oapvd_core_t *core, oapv_bs_t *bs, int c, int s_dst, void *dst)
1634 {
1635 int mb_h, mb_w, mb_y, mb_x, blk_y, blk_x;
1636 int le, ri, to, bo;
1637 int ret;
1638 s16 *d16;
1639
1640 mb_h = OAPV_MB_H >> ctx->comp_sft[c][1];
1641 mb_w = OAPV_MB_W >> ctx->comp_sft[c][0];
1642
1643 le = tile->x >> ctx->comp_sft[c][0]; // left position of tile
1644 ri = (tile->w >> ctx->comp_sft[c][0]) + le; // right pixel position of tile
1645 to = tile->y >> ctx->comp_sft[c][1]; // top pixel position of tile
1646 bo = (tile->h >> ctx->comp_sft[c][1]) + to; // bottom pixel position of tile
1647
1648 for(mb_y = to; mb_y < bo; mb_y += mb_h) {
1649 for(mb_x = le; mb_x < ri; mb_x += mb_w) {
1650 for(blk_y = mb_y; blk_y < (mb_y + mb_h); blk_y += OAPV_BLK_H) {
1651 for(blk_x = mb_x; blk_x < (mb_x + mb_w); blk_x += OAPV_BLK_W) {
1652 // parse DC coefficient
1653 ret = oapvd_vlc_dc_coeff(ctx, core, bs, &core->dc_diff, c);
1654 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1655
1656 // parse AC coefficient
1657 ret = oapvd_vlc_ac_coeff(ctx, core, bs, core->coef, c);
1658 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1659 DUMP_COEF(core->coef, OAPV_BLK_D, blk_x, blk_y, c);
1660
1661 // decode a block
1662 ret = dec_block(ctx, core, OAPV_LOG2_BLK_W, OAPV_LOG2_BLK_H, c);
1663 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1664
1665 // copy decoded block to image buffer
1666 d16 = (s16 *)((u8 *)dst + blk_y * s_dst) + blk_x;
1667 ctx->fn_block_to_imgb[c](core->coef, OAPV_BLK_W, OAPV_BLK_H, (OAPV_BLK_W << 1), blk_x, s_dst, d16);
1668 }
1669 }
1670 }
1671 }
1672
1673 /* byte align */
1674 oapv_bsr_align8(bs);
1675 return OAPV_OK;
1676 }
1677
dec_tile(oapvd_core_t * core,oapvd_tile_t * tile)1678 static int dec_tile(oapvd_core_t *core, oapvd_tile_t *tile)
1679 {
1680 int ret, midx, x, y, c;
1681 oapvd_ctx_t *ctx = core->ctx;
1682 oapv_bs_t bs;
1683
1684 oapv_bsr_init(&bs, tile->bs_beg + OAPV_TILE_SIZE_LEN, tile->data_size, NULL);
1685 ret = oapvd_vlc_tile_header(&bs, ctx, &tile->th);
1686 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1687 for(c = 0; c < ctx->num_comp; c++) {
1688 core->qp[c] = tile->th.tile_qp[c];
1689 int dq_scale = oapv_tbl_dq_scale[core->qp[c] % 6];
1690 core->dq_shift[c] = ctx->bit_depth - 2 - (core->qp[c] / 6);
1691
1692 core->prev_dc_ctx[c] = 20;
1693 core->prev_1st_ac_ctx[c] = 0;
1694 core->prev_dc[c] = 0;
1695
1696 midx = 0;
1697 for(y = 0; y < OAPV_BLK_H; y++) {
1698 for(x = 0; x < OAPV_BLK_W; x++) {
1699 core->q_mat[c][midx++] = dq_scale * ctx->fh.q_matrix[c][y][x]; // 7bit + 8bit
1700 }
1701 }
1702 }
1703
1704 for(c = 0; c < ctx->num_comp; c++) {
1705 int tc, s_dst;
1706 s16 *dst;
1707
1708 if(OAPV_CS_GET_FORMAT(ctx->imgb->cs) == OAPV_CF_PLANAR2) {
1709 tc = c > 0 ? 1 : 0;
1710 dst = ctx->imgb->a[tc];
1711 dst += (c > 1) ? 1 : 0;
1712 s_dst = ctx->imgb->s[tc];
1713 }
1714 else {
1715 dst = ctx->imgb->a[c];
1716 s_dst = ctx->imgb->s[c];
1717 }
1718
1719 ret = dec_tile_comp(tile, ctx, core, &bs, c, s_dst, dst);
1720 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
1721 }
1722
1723 oapvd_vlc_tile_dummy_data(&bs);
1724 return OAPV_OK;
1725 }
1726
dec_thread_tile(void * arg)1727 static int dec_thread_tile(void *arg)
1728 {
1729 oapv_bs_t bs;
1730 int i, ret, run, tile_idx = 0, thread_ret = OAPV_OK;
1731
1732 oapvd_core_t *core = (oapvd_core_t *)arg;
1733 oapvd_ctx_t *ctx = core->ctx;
1734 oapvd_tile_t *tile = ctx->tile;
1735
1736 while(1) {
1737 // find not decoded tile
1738 oapv_tpool_enter_cs(ctx->sync_obj);
1739 for(i = 0; i < ctx->num_tiles; i++) {
1740 if(tile[i].stat == DEC_TILE_STAT_NOT_DECODED) {
1741 tile[i].stat = DEC_TILE_STAT_ON_DECODING;
1742 tile_idx = i;
1743 break;
1744 }
1745 }
1746 oapv_tpool_leave_cs(ctx->sync_obj);
1747 if(i == ctx->num_tiles) {
1748 break;
1749 }
1750
1751 // wait until to know bistream start position
1752 run = 1;
1753 while(run) {
1754 oapv_tpool_enter_cs(ctx->sync_obj);
1755 if(tile[tile_idx].bs_beg != NULL) {
1756 run = 0;
1757 }
1758 oapv_tpool_leave_cs(ctx->sync_obj);
1759 }
1760 /* read tile size */
1761 oapv_bsr_init(&bs, tile[tile_idx].bs_beg, OAPV_TILE_SIZE_LEN, NULL);
1762 ret = oapvd_vlc_tile_size(&bs, &tile[tile_idx].data_size);
1763 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1764 oapv_assert_g(tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + (tile[tile_idx].data_size - 1) <= ctx->bs.end, ERR);
1765
1766 oapv_tpool_enter_cs(ctx->sync_obj);
1767 if(tile_idx + 1 < ctx->num_tiles) {
1768 tile[tile_idx + 1].bs_beg = tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + tile[tile_idx].data_size;
1769 }
1770 else {
1771 ctx->tile_end = tile[tile_idx].bs_beg + OAPV_TILE_SIZE_LEN + tile[tile_idx].data_size;
1772 }
1773 oapv_tpool_leave_cs(ctx->sync_obj);
1774
1775 ret = dec_tile(core, &tile[tile_idx]);
1776
1777 oapv_tpool_enter_cs(ctx->sync_obj);
1778 if (OAPV_SUCCEEDED(ret)) {
1779 tile[tile_idx].stat = DEC_TILE_STAT_DECODED;
1780 }
1781 else {
1782 tile[tile_idx].stat = ret;
1783 thread_ret = ret;
1784 }
1785 tile[tile_idx].stat = OAPV_SUCCEEDED(ret) ? DEC_TILE_STAT_DECODED : ret;
1786 oapv_tpool_leave_cs(ctx->sync_obj);
1787 }
1788 return thread_ret;
1789
1790 ERR:
1791 oapv_tpool_enter_cs(ctx->sync_obj);
1792 tile[tile_idx].stat = DEC_TILE_STAT_SIZE_ERROR;
1793 if (tile_idx + 1 < ctx->num_tiles)
1794 {
1795 tile[tile_idx + 1].bs_beg = tile[tile_idx].bs_beg;
1796 }
1797 oapv_tpool_leave_cs(ctx->sync_obj);
1798 return OAPV_ERR_MALFORMED_BITSTREAM;
1799 }
1800
dec_flush(oapvd_ctx_t * ctx)1801 static void dec_flush(oapvd_ctx_t *ctx)
1802 {
1803 if(ctx->cdesc.threads >= 2) {
1804 if(ctx->tpool) {
1805 // thread controller instance is present
1806 // terminate the created thread
1807 for(int i = 0; i < ctx->cdesc.threads - 1; i++) {
1808 if(ctx->thread_id[i]) {
1809 // valid thread instance
1810 ctx->tpool->release(&ctx->thread_id[i]);
1811 }
1812 }
1813 // dinitialize the tpool
1814 oapv_tpool_deinit(ctx->tpool);
1815 oapv_mfree(ctx->tpool);
1816 ctx->tpool = NULL;
1817 }
1818 }
1819
1820 oapv_tpool_sync_obj_delete(&(ctx->sync_obj));
1821
1822 for(int i = 0; i < ctx->cdesc.threads; i++) {
1823 dec_core_free(ctx->core[i]);
1824 }
1825 }
1826
dec_ready(oapvd_ctx_t * ctx)1827 static int dec_ready(oapvd_ctx_t *ctx)
1828 {
1829 int i, ret = OAPV_OK;
1830
1831 if(ctx->core[0] == NULL) {
1832 // create cores
1833 for(i = 0; i < ctx->cdesc.threads; i++) {
1834 ctx->core[i] = dec_core_alloc();
1835 oapv_assert_gv(ctx->core[i], ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
1836 ctx->core[i]->ctx = ctx;
1837 }
1838 }
1839
1840 // initialize the threads to NULL
1841 for(i = 0; i < OAPV_MAX_THREADS; i++) {
1842 ctx->thread_id[i] = 0;
1843 }
1844
1845 // get the context synchronization handle
1846 ctx->sync_obj = oapv_tpool_sync_obj_create();
1847 oapv_assert_gv(ctx->sync_obj != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
1848
1849 if(ctx->cdesc.threads >= 2) {
1850 ctx->tpool = oapv_malloc(sizeof(oapv_tpool_t));
1851 oapv_tpool_init(ctx->tpool, ctx->cdesc.threads - 1);
1852 for(i = 0; i < ctx->cdesc.threads - 1; i++) {
1853 ctx->thread_id[i] = ctx->tpool->create(ctx->tpool, i);
1854 oapv_assert_gv(ctx->thread_id[i] != NULL, ret, OAPV_ERR_UNKNOWN, ERR);
1855 }
1856 }
1857 return OAPV_OK;
1858
1859 ERR:
1860 dec_flush(ctx);
1861
1862 return ret;
1863 }
1864
dec_platform_init(oapvd_ctx_t * ctx)1865 static int dec_platform_init(oapvd_ctx_t *ctx)
1866 {
1867 // default settings
1868 ctx->fn_itx = oapv_tbl_fn_itx;
1869 ctx->fn_dquant = oapv_tbl_fn_dquant;
1870
1871 #if X86_SSE
1872 int check_cpu, support_sse, support_avx2;
1873
1874 check_cpu = oapv_check_cpu_info_x86();
1875 support_sse = (check_cpu >> 0) & 1;
1876 support_avx2 = (check_cpu >> 2) & 1;
1877
1878 if(support_avx2) {
1879 ctx->fn_itx = oapv_tbl_fn_itx_avx;
1880 ctx->fn_dquant = oapv_tbl_fn_dquant_avx;
1881 }
1882 else if(support_sse) {
1883 ctx->fn_itx = oapv_tbl_fn_itx;
1884 ctx->fn_dquant = oapv_tbl_fn_dquant;
1885 }
1886 #elif ARM_NEON
1887 ctx->fn_itx = oapv_tbl_fn_itx_neon;
1888 ctx->fn_dquant = oapv_tbl_fn_dquant;
1889 #endif
1890 return OAPV_OK;
1891 }
1892
oapvd_create(oapvd_cdesc_t * cdesc,int * err)1893 oapvd_t oapvd_create(oapvd_cdesc_t *cdesc, int *err)
1894 {
1895 oapvd_ctx_t *ctx;
1896 int ret;
1897
1898 DUMP_CREATE(0);
1899 ctx = NULL;
1900
1901 /* check if any decoder argument is correctly set */
1902 oapv_assert_gv(cdesc->threads > 0 && cdesc->threads <= OAPV_MAX_THREADS, ret, OAPV_ERR_INVALID_ARGUMENT, ERR);
1903
1904 /* memory allocation for ctx and core structure */
1905 ctx = (oapvd_ctx_t *)dec_ctx_alloc();
1906 oapv_assert_gv(ctx != NULL, ret, OAPV_ERR_OUT_OF_MEMORY, ERR);
1907 oapv_mcpy(&ctx->cdesc, cdesc, sizeof(oapvd_cdesc_t));
1908
1909 /* initialize platform-specific variables */
1910 ret = dec_platform_init(ctx);
1911 oapv_assert_g(ret == OAPV_OK, ERR);
1912
1913 /* ready for decoding */
1914 ret = dec_ready(ctx);
1915 oapv_assert_g(ret == OAPV_OK, ERR);
1916
1917 ctx->magic = OAPVD_MAGIC_CODE;
1918 ctx->id = (oapvd_t)ctx;
1919 if(err) {
1920 *err = OAPV_OK;
1921 }
1922 return (ctx->id);
1923
1924 ERR:
1925 if(ctx) {
1926 dec_ctx_free(ctx);
1927 }
1928 if(err) {
1929 *err = ret;
1930 }
1931 return NULL;
1932 }
1933
oapvd_delete(oapvd_t did)1934 void oapvd_delete(oapvd_t did)
1935 {
1936 oapvd_ctx_t *ctx;
1937 ctx = dec_id_to_ctx(did);
1938 oapv_assert_r(ctx);
1939
1940 DUMP_DELETE();
1941 dec_flush(ctx);
1942 dec_ctx_free(ctx);
1943 }
1944
oapvd_decode(oapvd_t did,oapv_bitb_t * bitb,oapv_frms_t * ofrms,oapvm_t mid,oapvd_stat_t * stat)1945 int oapvd_decode(oapvd_t did, oapv_bitb_t *bitb, oapv_frms_t *ofrms, oapvm_t mid, oapvd_stat_t *stat)
1946 {
1947 oapvd_ctx_t *ctx;
1948 oapv_bs_t *bs;
1949 oapv_pbuh_t pbuh;
1950 int ret = OAPV_OK;
1951 u32 pbu_size;
1952 u32 cur_read_size = 0;
1953 int frame_cnt = 0;
1954
1955 ctx = dec_id_to_ctx(did);
1956 oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
1957
1958 // read signature ('aPv1')
1959 oapv_assert_rv(bitb->ssize > 4, OAPV_ERR_MALFORMED_BITSTREAM);
1960 u32 signature = oapv_bsr_read_direct(bitb->addr, 32);
1961 oapv_assert_rv(signature == 0x61507631, OAPV_ERR_MALFORMED_BITSTREAM);
1962 cur_read_size += 4;
1963 stat->read += 4;
1964
1965 do {
1966 u32 remain = bitb->ssize - cur_read_size;
1967 oapv_assert_gv((remain >= 8), ret, OAPV_ERR_MALFORMED_BITSTREAM, ERR);
1968 oapv_bsr_init(&ctx->bs, (u8 *)bitb->addr + cur_read_size, remain, NULL);
1969 bs = &ctx->bs;
1970
1971 ret = oapvd_vlc_pbu_size(bs, &pbu_size); // read pbu_size (4 byte)
1972 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1973 remain -= 4; // size of pbu_size syntax
1974 oapv_assert_gv(pbu_size <= remain, ret, OAPV_ERR_MALFORMED_BITSTREAM, ERR);
1975
1976 ret = oapvd_vlc_pbu_header(bs, &pbuh);
1977 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1978
1979 if(pbuh.pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
1980 pbuh.pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME ||
1981 pbuh.pbu_type == OAPV_PBU_TYPE_PREVIEW_FRAME ||
1982 pbuh.pbu_type == OAPV_PBU_TYPE_DEPTH_FRAME ||
1983 pbuh.pbu_type == OAPV_PBU_TYPE_ALPHA_FRAME) {
1984
1985 oapv_assert_gv(frame_cnt < OAPV_MAX_NUM_FRAMES, ret, OAPV_ERR_REACHED_MAX, ERR);
1986
1987 ret = oapvd_vlc_frame_header(bs, &ctx->fh);
1988 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1989
1990 ret = dec_frm_prepare(ctx, ofrms->frm[frame_cnt].imgb);
1991 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
1992
1993 int res;
1994 oapv_tpool_t *tpool = ctx->tpool;
1995 int parallel_task = 1;
1996 int tidx = 0;
1997
1998 parallel_task = (ctx->cdesc.threads > ctx->num_tiles) ? ctx->num_tiles : ctx->cdesc.threads;
1999
2000 /* decode tiles ************************************/
2001 for(tidx = 0; tidx < (parallel_task - 1); tidx++) {
2002 tpool->run(ctx->thread_id[tidx], dec_thread_tile,
2003 (void *)ctx->core[tidx]);
2004 }
2005 ret = dec_thread_tile((void *)ctx->core[tidx]);
2006 for(tidx = 0; tidx < parallel_task - 1; tidx++) {
2007 tpool->join(ctx->thread_id[tidx], &res);
2008 if(OAPV_FAILED(res)) {
2009 ret = res;
2010 }
2011 }
2012 /****************************************************/
2013
2014 /* READ FILLER HERE !!! */
2015
2016 oapv_bsr_move(&ctx->bs, ctx->tile_end);
2017 stat->read += BSR_GET_READ_BYTE(&ctx->bs);
2018
2019 copy_fh_to_finfo(&ctx->fh, pbuh.pbu_type, pbuh.group_id, &stat->aui.frm_info[frame_cnt]);
2020 if(ret == OAPV_OK && ctx->use_frm_hash) {
2021 oapv_imgb_set_md5(ctx->imgb);
2022 }
2023 ret = dec_frm_finish(ctx); // FIX-ME
2024 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
2025
2026 ofrms->frm[frame_cnt].pbu_type = pbuh.pbu_type;
2027 ofrms->frm[frame_cnt].group_id = pbuh.group_id;
2028 stat->frm_size[frame_cnt] = pbu_size + 4 /* PUB size length*/;
2029 frame_cnt++;
2030 }
2031 else if(pbuh.pbu_type == OAPV_PBU_TYPE_METADATA) {
2032 ret = oapvd_vlc_metadata(bs, pbu_size, mid, pbuh.group_id);
2033 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
2034
2035 stat->read += BSR_GET_READ_BYTE(&ctx->bs);
2036 }
2037 else if(pbuh.pbu_type == OAPV_PBU_TYPE_FILLER) {
2038 ret = oapvd_vlc_filler(bs, (pbu_size - 4));
2039 oapv_assert_g(OAPV_SUCCEEDED(ret), ERR);
2040 }
2041 cur_read_size += pbu_size + 4;
2042 } while(cur_read_size < bitb->ssize);
2043 stat->aui.num_frms = frame_cnt;
2044 oapv_assert_gv(ofrms->num_frms == frame_cnt, ret, OAPV_ERR_MALFORMED_BITSTREAM, ERR);
2045 return ret;
2046
2047 ERR:
2048 return ret;
2049 }
2050
oapvd_config(oapvd_t did,int cfg,void * buf,int * size)2051 int oapvd_config(oapvd_t did, int cfg, void *buf, int *size)
2052 {
2053 oapvd_ctx_t *ctx;
2054
2055 ctx = dec_id_to_ctx(did);
2056 oapv_assert_rv(ctx, OAPV_ERR_INVALID_ARGUMENT);
2057
2058 switch(cfg) {
2059 /* set config ************************************************************/
2060 case OAPV_CFG_SET_USE_FRM_HASH:
2061 ctx->use_frm_hash = (*((int *)buf)) ? 1 : 0;
2062 break;
2063
2064 default:
2065 oapv_assert_rv(0, OAPV_ERR_UNSUPPORTED);
2066 }
2067 return OAPV_OK;
2068 }
2069
oapvd_info(void * au,int au_size,oapv_au_info_t * aui)2070 int oapvd_info(void *au, int au_size, oapv_au_info_t *aui)
2071 {
2072 int ret, frm_count = 0;
2073 u32 cur_read_size = 0;
2074
2075 DUMP_SET(0);
2076
2077 // read signature ('aPv1')
2078 oapv_assert_rv(au_size > 4, OAPV_ERR_MALFORMED_BITSTREAM);
2079 u32 signature = oapv_bsr_read_direct(au, 32);
2080 oapv_assert_rv(signature == 0x61507631, OAPV_ERR_MALFORMED_BITSTREAM);
2081 cur_read_size += 4;
2082
2083 /* 'au' address contains series of PBU */
2084 do {
2085 oapv_bs_t bs;
2086 u32 pbu_size = 0;
2087 u32 remain = au_size - cur_read_size;
2088 oapv_assert_rv((remain >= 8), OAPV_ERR_MALFORMED_BITSTREAM);
2089 oapv_bsr_init(&bs, (u8 *)au + cur_read_size, remain, NULL);
2090
2091 ret = oapvd_vlc_pbu_size(&bs, &pbu_size); // read pbu_size (4 byte)
2092 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2093 remain -= 4; // size of pbu_size syntax
2094 oapv_assert_rv(pbu_size <= remain, OAPV_ERR_MALFORMED_BITSTREAM);
2095
2096 /* pbu header */
2097 oapv_pbuh_t pbuh;
2098 ret = oapvd_vlc_pbu_header(&bs, &pbuh); // read pbu_header() (4 byte)
2099 oapv_assert_rv(OAPV_SUCCEEDED(ret), OAPV_ERR_MALFORMED_BITSTREAM);
2100 if(pbuh.pbu_type == OAPV_PBU_TYPE_AU_INFO) {
2101 // parse access_unit_info in PBU
2102 oapv_aui_t ai;
2103
2104 ret = oapvd_vlc_au_info(&bs, &ai);
2105 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2106
2107 aui->num_frms = ai.num_frames;
2108 for(int i = 0; i < ai.num_frames; i++) {
2109 copy_fi_to_finfo(&ai.frame_info[i], ai.pbu_type[i], ai.group_id[i], &aui->frm_info[i]);
2110 }
2111 return OAPV_OK; // founded access_unit_info, no need to read more PBUs
2112 }
2113 if(pbuh.pbu_type == OAPV_PBU_TYPE_PRIMARY_FRAME ||
2114 pbuh.pbu_type == OAPV_PBU_TYPE_NON_PRIMARY_FRAME ||
2115 pbuh.pbu_type == OAPV_PBU_TYPE_PREVIEW_FRAME ||
2116 pbuh.pbu_type == OAPV_PBU_TYPE_DEPTH_FRAME ||
2117 pbuh.pbu_type == OAPV_PBU_TYPE_ALPHA_FRAME) {
2118 // parse frame_info in PBU
2119 oapv_fi_t fi;
2120
2121 oapv_assert_rv(frm_count < OAPV_MAX_NUM_FRAMES, OAPV_ERR_REACHED_MAX)
2122 ret = oapvd_vlc_frame_info(&bs, &fi);
2123 oapv_assert_rv(OAPV_SUCCEEDED(ret), ret);
2124
2125 copy_fi_to_finfo(&fi, pbuh.pbu_type, pbuh.group_id, &aui->frm_info[frm_count]);
2126 frm_count++;
2127 }
2128 aui->num_frms = frm_count;
2129 cur_read_size += pbu_size + 4; /* 4byte is for pbu_size syntax itself */
2130 } while(cur_read_size < au_size);
2131 DUMP_SET(1);
2132 return OAPV_OK;
2133 }
2134
2135 ///////////////////////////////////////////////////////////////////////////////
2136 // end of decoder code
2137 #endif // ENABLE_DECODER
2138 ///////////////////////////////////////////////////////////////////////////////