1 /*
2 * Copyright (c) 2018 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24 #define LIBVA_UTILS_UPLOAD_DOWNLOAD_YUV_SURFACE 1
25
26 #include <stdio.h>
27 #include <string.h>
28 #include <stdlib.h>
29 #include <getopt.h>
30 #include <unistd.h>
31 #include <sys/types.h>
32 #include <sys/stat.h>
33 #include <sys/time.h>
34 #include <sys/mman.h>
35 #include <fcntl.h>
36 #include <assert.h>
37 #include <pthread.h>
38 #include <errno.h>
39 #include <math.h>
40 #include <va/va.h>
41 #include <va/va_enc_hevc.h>
42 #include "va_display.h"
43 #define ALIGN16(x) ((x+15)&~15)
44 #define CHECK_VASTATUS(va_status,func) \
45 if (va_status != VA_STATUS_SUCCESS) { \
46 fprintf(stderr,"%s:%s (%d) failed,exit\n", __func__, func, __LINE__); \
47 exit(1); \
48 }
49
50 #define CHECK_CONDITION(cond) \
51 if(!(cond)) \
52 { \
53 fprintf(stderr, "Unexpected condition: %s:%d\n", __func__, __LINE__);\
54 exit(1); \
55 }
56
57 #include "loadsurface.h"
58
59 #define NAL_REF_IDC_NONE 0
60 #define NAL_REF_IDC_LOW 1
61 #define NAL_REF_IDC_MEDIUM 2
62 #define NAL_REF_IDC_HIGH 3
63
64 #define FRAME_I 1
65 #define FRAME_P 2
66 #define FRAME_B 3
67 #define FRAME_IDR 7
68
69 // SLICE TYPE HEVC ENUM
70 enum {
71 SLICE_B = 0,
72 SLICE_P = 1,
73 SLICE_I = 2,
74 };
75 #define IS_I_SLICE(type) (SLICE_I == (type))
76 #define IS_P_SLICE(type) (SLICE_P == (type))
77 #define IS_B_SLICE(type) (SLICE_B == (type))
78
79
80
81 #define ENTROPY_MODE_CAVLC 0
82 #define ENTROPY_MODE_CABAC 1
83
84 #define PROFILE_IDC_MAIN 1
85 #define PROFILE_IDC_MAIN10 2
86
87 #define BITSTREAM_ALLOCATE_STEPPING 4096
88 static int LCU_SIZE = 32;
89
90 #define SURFACE_NUM 16 /* 16 surfaces for source YUV */
91 #define SURFACE_NUM 16 /* 16 surfaces for reference */
92 enum NALUType {
93 NALU_TRAIL_N = 0x00, // Coded slice segment of a non-TSA, non-STSA trailing picture - slice_segment_layer_rbsp, VLC
94 NALU_TRAIL_R = 0x01, // Coded slice segment of a non-TSA, non-STSA trailing picture - slice_segment_layer_rbsp, VLC
95 NALU_TSA_N = 0x02, // Coded slice segment of a TSA picture - slice_segment_layer_rbsp, VLC
96 NALU_TSA_R = 0x03, // Coded slice segment of a TSA picture - slice_segment_layer_rbsp, VLC
97 NALU_STSA_N = 0x04, // Coded slice of an STSA picture - slice_layer_rbsp, VLC
98 NALU_STSA_R = 0x05, // Coded slice of an STSA picture - slice_layer_rbsp, VLC
99 NALU_RADL_N = 0x06, // Coded slice of an RADL picture - slice_layer_rbsp, VLC
100 NALU_RADL_R = 0x07, // Coded slice of an RADL picture - slice_layer_rbsp, VLC
101 NALU_RASL_N = 0x08, // Coded slice of an RASL picture - slice_layer_rbsp, VLC
102 NALU_RASL_R = 0x09, // Coded slice of an RASL picture - slice_layer_rbsp, VLC
103 /* 0x0a..0x0f - Reserved */
104 NALU_BLA_W_LP = 0x10, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC
105 NALU_BLA_W_DLP = 0x11, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC
106 NALU_BLA_N_LP = 0x12, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC
107 NALU_IDR_W_DLP = 0x13, // Coded slice segment of an IDR picture - slice_segment_layer_rbsp, VLC
108 NALU_IDR_N_LP = 0x14, // Coded slice segment of an IDR picture - slice_segment_layer_rbsp, VLC
109 NALU_CRA = 0x15, // Coded slice segment of an CRA picture - slice_segment_layer_rbsp, VLC
110 /* 0x16..0x1f - Reserved */
111 NALU_VPS = 0x20, // Video parameter set - video_parameter_set_rbsp, non-VLC
112 NALU_SPS = 0x21, // Sequence parameter set - seq_parameter_set_rbsp, non-VLC
113 NALU_PPS = 0x22, // Picture parameter set - pic_parameter_set_rbsp, non-VLC
114 NALU_AUD = 0x23, // Access unit delimiter - access_unit_delimiter_rbsp, non-VLC
115 NALU_EOS = 0x24, // End of sequence - end_of_seq_rbsp, non-VLC
116 NALU_EOB = 0x25, // End of bitsteam - end_of_bitsteam_rbsp, non-VLC
117 NALU_FD = 0x26, // Filler data - filler_data_rbsp, non-VLC
118 NALU_PREFIX_SEI = 0x27, // Supplemental enhancement information (SEI) - sei_rbsp, non_VLC
119 NALU_SUFFIX_SEI = 0x28, // Supplemental enhancement information (SEI) - sei_rbsp, non_VLC
120 /* 0x29..0x2f - Reserved */
121 /* 0x30..0x3f - Unspecified */
122 //this should be the last element of this enum
123 //chagne this value if NAL unit type increased
124 MAX_HEVC_NAL_TYPE = 0x3f,
125
126 };
127
128 // Config const values
129 #define MAX_TEMPORAL_SUBLAYERS 8
130 #define MAX_LAYER_ID 64
131 #define MAX_LONGTERM_REF_PIC 32
132 #define NUM_OF_EXTRA_SLICEHEADER_BITS 3
133 struct ProfileTierParamSet {
134 uint8_t general_profile_space; //u(2)
135 int general_tier_flag; //u(1)
136 uint8_t general_profile_idc; //u(5)
137 int general_profile_compatibility_flag[32]; //u(1)
138 int general_progressive_source_flag; //u(1)
139 int general_interlaced_source_flag; //u(1)
140 int general_non_packed_constraint_flag; //u(1)
141 int general_frame_only_constraint_flag; //u(1)
142 int general_reserved_zero_43bits[43]; //u(1)
143 int general_reserved_zero_bit; //u(1)
144 uint8_t general_level_idc; //u(8)
145 };
146 // Video parameter set structure
147 struct VideoParamSet {
148 uint8_t vps_video_parameter_set_id; //u(4)
149 int vps_base_layer_internal_flag; //u(1)
150 int vps_base_layer_available_flag; //u(1)
151 uint8_t vps_max_layers_minus1; //u(6)
152 uint8_t vps_max_sub_layers_minus1; //u(3)
153 int vps_temporal_id_nesting_flag; //u(1)
154 uint16_t vps_reserved_0xffff_16bits; //u(16)
155
156 struct ProfileTierParamSet ptps;
157 uint8_t vps_max_nuh_reserved_zero_layer_id;
158 uint32_t vps_max_op_sets;
159 uint32_t vps_num_op_sets_minus1;
160
161 int vps_sub_layer_ordering_info_present_flag; //u(1)
162 uint32_t vps_max_dec_pic_buffering_minus1[MAX_TEMPORAL_SUBLAYERS]; //ue(v)
163 uint32_t vps_max_num_reorder_pics[MAX_TEMPORAL_SUBLAYERS]; //ue(v)
164 uint32_t vps_max_latency_increase_plus1[MAX_TEMPORAL_SUBLAYERS]; //ue(v)
165 uint8_t vps_max_layer_id; //u(6)
166 uint32_t vps_num_layer_sets_minus1; //ue(v)
167 int layer_id_included_flag[MAX_TEMPORAL_SUBLAYERS][MAX_LAYER_ID]; //u(1)
168 int vps_timing_info_present_flag; //u(1)
169 uint32_t vps_num_units_in_tick; //u(32)
170 uint32_t vps_time_scale; //u(32
171 int vps_poc_proportional_to_timing_flag; //u(1)
172 uint32_t vps_num_ticks_poc_diff_one_minus1; //ue(v)
173 uint32_t vps_num_hrd_parameters; //ue(v)
174 uint32_t hrd_layer_set_idx[MAX_TEMPORAL_SUBLAYERS]; //ue(v)
175 int cprms_present_flag[MAX_TEMPORAL_SUBLAYERS]; //u(1)
176 int vps_extension_flag; //u(1)
177 int vps_extension_data_flag; //u(1)
178 };
179
180 struct ShortTermRefPicParamSet {
181 int inter_ref_pic_set_prediction_flag; //u(1)
182 uint32_t delta_idx_minus1; //ue(v)
183 uint8_t delta_rps_sign; //u(1)
184 uint32_t abs_delta_rps_minus1; //ue(v)
185 uint8_t used_by_curr_pic_flag[32]; //u(1)
186 uint8_t use_delta_flag[32]; //u(1)
187 uint32_t num_negative_pics; //ue(v)
188 uint32_t num_positive_pics; //ue(v)
189 uint32_t delta_poc_s0_minus1[32]; //ue(v)
190 uint8_t used_by_curr_pic_s0_flag[32]; //u(1)
191 uint32_t delta_poc_s1_minus1[32]; //ue(v)
192 uint8_t used_by_curr_pic_s1_flag[32]; //u(1)
193 };
194 struct SeqParamSet {
195 uint8_t sps_video_parameter_set_id; //u(4)
196 uint8_t sps_max_sub_layers_minus1; //u(3)
197 int sps_temporal_id_nesting_flag; //u(1)
198
199 struct ProfileTierParamSet ptps;
200 uint32_t sps_seq_parameter_set_id; //ue(v)
201 uint32_t chroma_format_idc; //ue(v)
202 int separate_colour_plane_flag; //u(1)
203 uint32_t pic_width_in_luma_samples; //ue(v)
204 uint32_t pic_height_in_luma_samples; //ue(v)
205 int conformance_window_flag; //u(1)
206 uint32_t conf_win_left_offset; //ue(v)
207 uint32_t conf_win_right_offset; //ue(v)
208 uint32_t conf_win_top_offset; //ue(v)
209 uint32_t conf_win_bottom_offset; //ue(v)
210 uint32_t bit_depth_luma_minus8; //ue(v)
211 uint32_t bit_depth_chroma_minus8; //ue(v)
212 uint32_t log2_max_pic_order_cnt_lsb_minus4; //ue(v)
213 int sps_sub_layer_ordering_info_present_flag; //u(1)
214 uint32_t sps_max_dec_pic_buffering_minus1[MAX_TEMPORAL_SUBLAYERS]; //ue(v)
215 uint32_t sps_max_num_reorder_pics[MAX_TEMPORAL_SUBLAYERS]; //ue(v)
216 uint32_t sps_max_latency_increase_plus1[MAX_TEMPORAL_SUBLAYERS]; //ue(v)
217 uint32_t log2_min_luma_coding_block_size_minus3; //ue(v)
218 uint32_t log2_diff_max_min_luma_coding_block_size;
219 uint32_t log2_max_coding_block_size_minus3; //ue(v)
220 uint32_t log2_min_luma_transform_block_size_minus2; //ue(v)
221 uint32_t log2_diff_max_min_luma_transform_block_size; //ue(v)
222 uint32_t max_transform_hierarchy_depth_inter; //ue(v)
223 uint32_t max_transform_hierarchy_depth_intra; //ue(v)
224 uint8_t scaling_list_enabled_flag; //u(1)
225 uint8_t sps_scaling_list_data_present_flag; //u(1)
226 uint8_t amp_enabled_flag; //u(1)
227 uint8_t sample_adaptive_offset_enabled_flag; //u(1)
228 uint8_t pcm_enabled_flag; //u(1)
229 uint8_t pcm_sample_bit_depth_luma_minus1; //u(4)
230 uint8_t pcm_sample_bit_depth_chroma_minus1; //u(4)
231 uint32_t log2_min_pcm_luma_coding_block_size_minus3;
232 uint32_t log2_max_pcm_luma_coding_block_size_minus3; //ue(v)
233 uint32_t log2_diff_max_min_pcm_luma_coding_block_size; //ue(v)
234 uint8_t pcm_loop_filter_disabled_flag; //u(1)
235 uint32_t num_short_term_ref_pic_sets; //ue(v)
236
237 struct ShortTermRefPicParamSet strp[66];
238 uint8_t long_term_ref_pics_present_flag; //u(1)
239 uint32_t num_long_term_ref_pics_sps; //ue(v)
240 uint32_t lt_ref_pic_poc_lsb_sps[MAX_LONGTERM_REF_PIC]; //u(v)
241 uint8_t used_by_curr_pic_lt_sps_flag[MAX_LONGTERM_REF_PIC]; //u(1)
242 uint8_t sps_temporal_mvp_enabled_flag; //u(1)
243 uint8_t strong_intra_smoothing_enabled_flag; //u(1)
244 uint8_t vui_parameters_present_flag; //u(1)
245 //VuiParameters vui_parameters;
246 int sps_extension_present_flag; //u(1)
247 int sps_range_extension_flag; //u(1)
248 int sps_multilayer_extension_flag; //u(1)
249 int sps_3d_extension_flag; //u(1)
250 uint8_t sps_extension_5bits; //u(5)
251 int sps_extension_data_flag; //u(1)
252 };
253 struct PicParamSet {
254 uint32_t pps_pic_parameter_set_id; //ue(v)
255 uint32_t pps_seq_parameter_set_id; //ue(v)
256 int dependent_slice_segments_enabled_flag; //u(1)
257 int output_flag_present_flag; //u(1)
258 uint8_t num_extra_slice_header_bits; //u(3)
259 int sign_data_hiding_enabled_flag; //u(1)
260 int cabac_init_present_flag; //u(1)
261 uint32_t num_ref_idx_l0_default_active_minus1; //ue(v)
262 uint32_t num_ref_idx_l1_default_active_minus1; //ue(v)
263 int32_t init_qp_minus26; //se(v)
264 int constrained_intra_pred_flag; //u(1)
265 int transform_skip_enabled_flag; //u(1)
266 int cu_qp_delta_enabled_flag; //u(1)
267 uint32_t diff_cu_qp_delta_depth; //ue(v)
268 uint32_t pps_cb_qp_offset; //se(v)
269 uint32_t pps_cr_qp_offset; //se(v)
270 int pps_slice_chroma_qp_offsets_present_flag; //u(1)
271 int weighted_pred_flag; //u(1)
272 int weighted_bipred_flag; //u(1)
273 int transquant_bypass_enabled_flag; //u(1)
274 int tiles_enabled_flag; //u(1)
275 int entropy_coding_sync_enabled_flag; //u(1)
276 uint32_t num_tile_columns_minus1; //ue(v)
277 uint32_t num_tile_rows_minus1; //ue(v)
278 int uniform_spacing_flag; //u(1)
279 uint32_t *column_width_minus1; //ue(v)
280 uint32_t *row_height_minus1; //ue(v)
281 int loop_filter_across_tiles_enabled_flag; //u(1)
282 int pps_loop_filter_across_slices_enabled_flag; //u(1)
283 int deblocking_filter_control_present_flag; //u(1)
284 int deblocking_filter_override_enabled_flag; //u(1)
285 int pps_deblocking_filter_disabled_flag; //u(1)
286 int32_t pps_beta_offset_div2; //se(v)
287 int32_t pps_tc_offset_div2; //se(v)
288 int pps_scaling_list_data_present_flag; //u(1)
289 int lists_modification_present_flag; //u(1)
290 uint32_t log2_parallel_merge_level_minus2; //ue(v)
291 int slice_segment_header_extension_present_flag; //u(1)
292 int pps_extension_present_flag; //u(1)
293 int pps_range_extension_flag; //u(1)
294 int pps_multilayer_extension_flag; //u(1)
295 int pps_3d_extension_flag; //u(1)
296 uint8_t pps_extension_5bits; //u(5)
297 uint8_t pps_extension_data_flag; //u(1)
298 uint32_t log2_max_transform_skip_block_size_minus2; //ue(v)
299 uint8_t cross_component_prediction_enabled_flag; //ue(1)
300 uint8_t chroma_qp_offset_list_enabled_flag; //ue(1)
301 uint32_t diff_cu_chroma_qp_offset_depth; //ue(v)
302 uint32_t chroma_qp_offset_list_len_minus1; //ue(v)
303 uint32_t cb_qp_offset_list[6]; //se(v)
304 uint32_t cr_qp_offset_list[6]; //se(v)
305 uint32_t log2_sao_offset_scale_luma; //ue(v)
306 uint32_t log2_sao_offset_scale_chroma; //ue(v)
307 };
308 struct SliceHeader {
309 int first_slice_segment_in_pic_flag; //u(1)
310 int no_output_of_prior_pics_flag; //u(1)
311 uint32_t slice_pic_parameter_set_id; //ue(v)
312 int dependent_slice_segment_flag; //u(1)
313 uint32_t picture_width_in_ctus;
314 uint32_t picture_height_in_ctus;
315 uint32_t slice_segment_address; //u(v)
316 int slice_reserved_undetermined_flag[NUM_OF_EXTRA_SLICEHEADER_BITS]; //u(1)
317 uint32_t slice_type; //ue(v)
318 int pic_output_flag; //u(1)
319 uint8_t colour_plane_id; //u(2)
320 uint32_t pic_order_cnt_lsb;
321 uint32_t num_negative_pics;
322 uint32_t num_positive_pics;
323 uint32_t delta_poc_s0_minus1;
324
325 struct ShortTermRefPicParamSet strp;
326 int short_term_ref_pic_set_sps_flag; //u(1)
327 uint32_t short_term_ref_pic_set_idx; //u(v)
328 uint32_t num_long_term_sps; //ue(v)
329 uint32_t num_long_term_pics; //ue(v)
330 uint32_t *lt_idx_sps; //u(v)
331 uint32_t *poc_lsb_lt; //u(v)
332 int *used_by_curr_pic_lt_flag; //u(1)
333 int *delta_poc_msb_present_flag; //u(1)
334 uint32_t *delta_poc_msb_cycle_lt; //ue(v)
335 int slice_temporal_mvp_enabled_flag; //u(1)
336 int slice_sao_luma_flag; //u(1)
337 int slice_sao_chroma_flag; //u(1)
338 int num_ref_idx_active_override_flag; //u(1)
339 uint32_t num_ref_idx_l0_active_minus1; //ue(v)
340 uint32_t num_ref_idx_l1_active_minus1;
341 uint32_t num_poc_total_cur;
342 int ref_pic_list_modification_flag_l0;
343 int ref_pic_list_modification_flag_l1;
344 uint32_t* list_entry_l0;
345 uint32_t* list_entry_l1;
346
347 int ref_pic_list_combination_flag;
348
349 uint32_t num_ref_idx_lc_active_minus1;
350 uint32_t ref_pic_list_modification_flag_lc;
351 int pic_from_list_0_flag;
352 uint32_t ref_idx_list_curr;
353 int mvd_l1_zero_flag; //u(1)
354 int cabac_init_present_flag;
355 int pic_temporal_mvp_enable_flag;
356
357 int collocated_from_l0_flag; //u(1)
358 uint32_t collocated_ref_idx; //ue(v)
359 uint32_t five_minus_max_num_merge_cand; //ue(v)
360 int32_t delta_pic_order_cnt_bottom; //se(v)
361 int32_t slice_qp_delta; //se(v)
362 int32_t slice_qp_delta_cb; //se(v)
363 int32_t slice_qp_delta_cr; //se(v)
364 int cu_chroma_qp_offset_enabled_flag; //u(1)
365 int deblocking_filter_override_flag; //u(1)
366 int disable_deblocking_filter_flag; //u(1)
367 int32_t beta_offset_div2; //se(v)
368 int32_t tc_offset_div2; //se(v)
369 int slice_loop_filter_across_slices_enabled_flag; //u(1)
370 uint32_t num_entry_point_offsets; //ue(v)
371 uint32_t offset_len_minus1; //ue(v)
372 uint32_t *entry_point_offset; //u(v)
373 uint32_t slice_segment_header_extension_length; //ue(v)
374 uint8_t *slice_segment_header_extension_data_byte; //u(8)
375 };
376
377 struct BlockSizes {
378 uint32_t log2_max_coding_tree_block_size_minus3;
379 uint32_t log2_min_coding_tree_block_size_minus3;
380 uint32_t log2_min_luma_coding_block_size_minus3;
381 uint32_t log2_max_luma_transform_block_size_minus2;
382 uint32_t log2_min_luma_transform_block_size_minus2;
383 uint32_t log2_max_pcm_coding_block_size_minus3;
384 uint32_t log2_min_pcm_coding_block_size_minus3;
385 uint32_t max_max_transform_hierarchy_depth_inter;
386 uint32_t min_max_transform_hierarchy_depth_inter;
387 uint32_t max_max_transform_hierarchy_depth_intra;
388 uint32_t min_max_transform_hierarchy_depth_intra;
389 };
390
391 struct Features {
392 uint32_t amp; //sps->amp_enable_flag
393 uint32_t constrained_intra_pred;
394 uint32_t cu_qp_delta; // pps->cu_qp_delta_enabled_flag
395 uint32_t deblocking_filter_disable;
396 uint32_t dependent_slices;
397 uint32_t pcm; // sps->pcm_enable_flag
398 uint32_t sao; //sps->sample_adaptive_offset_enabled_flag
399 uint32_t scaling_lists;
400 uint32_t separate_colour_planes;
401 uint32_t sign_data_hiding;
402 uint32_t strong_intra_smoothing;
403 uint32_t temporal_mvp; //sps->sps_temporal_mvp_enabled_flag
404 uint32_t transform_skip; // pps->transform_skip_enabled_flag
405 uint32_t transquant_bypass;
406 uint32_t weighted_prediction;
407 };
408
409 static struct VideoParamSet vps;
410 static struct SeqParamSet sps;
411 static struct PicParamSet pps;
412 static struct SliceHeader ssh;
413 static struct BlockSizes block_sizes;
414 static int use_block_sizes = 0;
415 static struct Features features;
416 static int use_features = 0;
417 static VADisplay va_dpy;
418 static VAProfile hevc_profile = ~0;
419 static int real_hevc_profile = 0;
420 static VAEntrypoint entryPoint = VAEntrypointEncSlice;
421 static int p2b = 1;
422 static int lowpower = 0;
423 static VAConfigAttrib attrib[VAConfigAttribTypeMax];
424 static VAConfigAttrib config_attrib[VAConfigAttribTypeMax];
425 static int config_attrib_num = 0, enc_packed_header_idx;
426 static VASurfaceID src_surface[SURFACE_NUM];
427 static VABufferID coded_buf[SURFACE_NUM];
428 static VASurfaceID ref_surface[SURFACE_NUM];
429 static VAConfigID config_id;
430 static VAContextID context_id;
431 static struct ProfileTierParamSet protier_param;
432
433 static VAEncSequenceParameterBufferHEVC seq_param;
434 static VAEncPictureParameterBufferHEVC pic_param;
435 static VAEncSliceParameterBufferHEVC slice_param;
436 static VAPictureHEVC CurrentCurrPic;
437 static VAPictureHEVC ReferenceFrames[16], RefPicList0_P[32], RefPicList0_B[32], RefPicList1_B[32];
438
439 static unsigned int MaxPicOrderCntLsb = (2 << 8);
440
441 static unsigned int num_ref_frames = 2;
442 static unsigned int num_active_ref_p = 1;
443 static unsigned int numShortTerm = 0;
444 static int constraint_set_flag = 0;
445 static int hevc_packedheader = 0;
446 static int hevc_maxref = 16;
447
448 static char *coded_fn = NULL, *srcyuv_fn = NULL, *recyuv_fn = NULL;
449 static FILE *coded_fp = NULL, *srcyuv_fp = NULL, *recyuv_fp = NULL;
450 static unsigned long long srcyuv_frames = 0;
451 static int srcyuv_fourcc = VA_FOURCC_NV12;
452 static int calc_psnr = 0;
453
454 static int frame_width = 176;
455 static int frame_height = 144;
456 static int frame_width_aligned;
457 static int frame_height_aligned;
458 static int frame_rate = 30;
459 static unsigned int frame_count = 60;
460 static unsigned int frame_coded = 0;
461 static unsigned int frame_bitrate = 0;
462 static unsigned int frame_slices = 1;
463 static double frame_size = 0;
464 static int initial_qp = 26;
465 static int minimal_qp = 0;
466 static int intra_period = 30;
467 static int intra_idr_period = 60;
468 static int ip_period = 1;
469 static int rc_mode = -1;
470 static int rc_default_modes[] = {
471 VA_RC_VBR,
472 VA_RC_CQP,
473 VA_RC_VBR_CONSTRAINED,
474 VA_RC_CBR,
475 VA_RC_VCM,
476 VA_RC_NONE,
477 };
478 static unsigned long long current_frame_encoding = 0;
479 static unsigned long long current_frame_display = 0;
480 static unsigned long long current_IDR_display = 0;
481 static unsigned int current_frame_num = 0;
482 static int current_frame_type;
483 #define current_slot (current_frame_display % SURFACE_NUM)
484
485 static int misc_priv_type = 0;
486 static int misc_priv_value = 0;
487
488 #define MIN(a, b) ((a)>(b)?(b):(a))
489 #define MAX(a, b) ((a)>(b)?(a):(b))
490
491 /* thread to save coded data/upload source YUV */
492 struct storage_task_t {
493 void *next;
494 unsigned long long display_order;
495 unsigned long long encode_order;
496 };
497 static struct storage_task_t *storage_task_header = NULL, *storage_task_tail = NULL;
498 #define SRC_SURFACE_IN_ENCODING 0
499 #define SRC_SURFACE_IN_STORAGE 1
500 static int srcsurface_status[SURFACE_NUM];
501 static int encode_syncmode = 0;
502 static pthread_mutex_t encode_mutex = PTHREAD_MUTEX_INITIALIZER;
503 static pthread_cond_t encode_cond = PTHREAD_COND_INITIALIZER;
504 static pthread_t encode_thread;
505
506 /* for performance profiling */
507 static unsigned int UploadPictureTicks = 0;
508 static unsigned int BeginPictureTicks = 0;
509 static unsigned int RenderPictureTicks = 0;
510 static unsigned int EndPictureTicks = 0;
511 static unsigned int SyncPictureTicks = 0;
512 static unsigned int SavePictureTicks = 0;
513 static unsigned int TotalTicks = 0;
514
515 struct __bitstream {
516 unsigned int *buffer;
517 int bit_offset;
518 int max_size_in_dword;
519 };
520 typedef struct __bitstream bitstream;
521
522 static unsigned int
va_swap32(unsigned int val)523 va_swap32(unsigned int val)
524 {
525 unsigned char *pval = (unsigned char *)&val;
526
527 return ((pval[0] << 24) |
528 (pval[1] << 16) |
529 (pval[2] << 8) |
530 (pval[3] << 0));
531 }
532
533 static void
bitstream_start(bitstream * bs)534 bitstream_start(bitstream *bs)
535 {
536 bs->max_size_in_dword = BITSTREAM_ALLOCATE_STEPPING;
537 bs->buffer = calloc(bs->max_size_in_dword * sizeof(int), 1);
538 assert(bs->buffer);
539 bs->bit_offset = 0;
540 }
541
542 static void
bitstream_end(bitstream * bs)543 bitstream_end(bitstream *bs)
544 {
545 int pos = (bs->bit_offset >> 5);
546 int bit_offset = (bs->bit_offset & 0x1f);
547 int bit_left = 32 - bit_offset;
548
549 if (bit_offset) {
550 bs->buffer[pos] = va_swap32((bs->buffer[pos] << bit_left));
551 }
552 }
553
554 static void
put_ui(bitstream * bs,unsigned int val,int size_in_bits)555 put_ui(bitstream *bs, unsigned int val, int size_in_bits)
556 {
557 int pos = (bs->bit_offset >> 5);
558 int bit_offset = (bs->bit_offset & 0x1f);
559 int bit_left = 32 - bit_offset;
560
561 if (!size_in_bits)
562 return;
563
564 bs->bit_offset += size_in_bits;
565
566 if (bit_left > size_in_bits) {
567 bs->buffer[pos] = (bs->buffer[pos] << size_in_bits | val);
568 } else {
569 size_in_bits -= bit_left;
570 bs->buffer[pos] = (bs->buffer[pos] << bit_left) | (val >> size_in_bits);
571 bs->buffer[pos] = va_swap32(bs->buffer[pos]);
572
573 if (pos + 1 == bs->max_size_in_dword) {
574 bs->max_size_in_dword += BITSTREAM_ALLOCATE_STEPPING;
575 bs->buffer = realloc(bs->buffer, bs->max_size_in_dword * sizeof(unsigned int));
576 assert(bs->buffer);
577 }
578
579 bs->buffer[pos + 1] = val;
580 }
581 }
582
583 static void
put_ue(bitstream * bs,unsigned int val)584 put_ue(bitstream *bs, unsigned int val)
585 {
586 int size_in_bits = 0;
587 int tmp_val = ++val;
588
589 while (tmp_val) {
590 tmp_val >>= 1;
591 size_in_bits++;
592 }
593
594 put_ui(bs, 0, size_in_bits - 1); // leading zero
595 put_ui(bs, val, size_in_bits);
596 }
597
598 static void
put_se(bitstream * bs,int val)599 put_se(bitstream *bs, int val)
600 {
601 unsigned int new_val;
602
603 if (val <= 0)
604 new_val = -2 * val;
605 else
606 new_val = 2 * val - 1;
607
608 put_ue(bs, new_val);
609 }
610
611 static void
byte_aligning(bitstream * bs,int bit)612 byte_aligning(bitstream *bs, int bit)
613 {
614 int bit_offset = (bs->bit_offset & 0x7);
615 int bit_left = 8 - bit_offset;
616 int new_val;
617
618 if (!bit_offset)
619 return;
620
621 assert(bit == 0 || bit == 1);
622
623 if (bit)
624 new_val = (1 << bit_left) - 1;
625 else
626 new_val = 0;
627
628 put_ui(bs, new_val, bit_left);
629 }
630
631 static void
rbsp_trailing_bits(bitstream * bs)632 rbsp_trailing_bits(bitstream *bs)
633 {
634 put_ui(bs, 1, 1);
635 byte_aligning(bs, 0);
636 }
637
nal_start_code_prefix(bitstream * bs,int nal_unit_type)638 static void nal_start_code_prefix(bitstream *bs, int nal_unit_type)
639 {
640 if (nal_unit_type == NALU_VPS ||
641 nal_unit_type == NALU_SPS ||
642 nal_unit_type == NALU_PPS ||
643 nal_unit_type == NALU_AUD)
644 put_ui(bs, 0x00000001, 32);
645 else
646 put_ui(bs, 0x000001, 24);
647 }
648
nal_header(bitstream * bs,int nal_unit_type)649 static void nal_header(bitstream *bs, int nal_unit_type)
650 {
651 put_ui(bs, 0, 1); /* forbidden_zero_bit: 0 */
652 put_ui(bs, nal_unit_type, 6);
653 put_ui(bs, 0, 6);
654 put_ui(bs, 1, 3);
655 }
656
calc_poc(int pic_order_cnt_lsb)657 static int calc_poc(int pic_order_cnt_lsb)
658 {
659 static int picOrderCntMsb_ref = 0, pic_order_cnt_lsb_ref = 0;
660 int prevPicOrderCntMsb, prevPicOrderCntLsb;
661 int picOrderCntMsb, picOrderCnt;
662
663 if (current_frame_type == FRAME_IDR)
664 prevPicOrderCntMsb = prevPicOrderCntLsb = 0;
665 else {
666 prevPicOrderCntMsb = picOrderCntMsb_ref;
667 prevPicOrderCntLsb = pic_order_cnt_lsb_ref;
668 }
669
670 if ((pic_order_cnt_lsb < prevPicOrderCntLsb) &&
671 ((prevPicOrderCntLsb - pic_order_cnt_lsb) >= (int)(MaxPicOrderCntLsb / 2)))
672 picOrderCntMsb = prevPicOrderCntMsb + MaxPicOrderCntLsb;
673 else if ((pic_order_cnt_lsb > prevPicOrderCntLsb) &&
674 ((pic_order_cnt_lsb - prevPicOrderCntLsb) > (int)(MaxPicOrderCntLsb / 2)))
675 picOrderCntMsb = prevPicOrderCntMsb - MaxPicOrderCntLsb;
676 else
677 picOrderCntMsb = prevPicOrderCntMsb;
678
679 picOrderCnt = picOrderCntMsb + pic_order_cnt_lsb;
680
681 if (current_frame_type != FRAME_B) {
682 picOrderCntMsb_ref = picOrderCntMsb;
683 pic_order_cnt_lsb_ref = pic_order_cnt_lsb;
684 }
685
686 return picOrderCnt;
687 }
688
fill_profile_tier_level(uint8_t vps_max_layers_minus1,struct ProfileTierParamSet * ptps,uint8_t profilePresentFlag)689 static void fill_profile_tier_level(
690 uint8_t vps_max_layers_minus1,
691 struct ProfileTierParamSet *ptps,
692 uint8_t profilePresentFlag)
693 {
694 if (!profilePresentFlag)
695 return;
696
697 memset(ptps, 0, sizeof(*ptps));
698
699 ptps->general_profile_space = 0;
700 ptps->general_tier_flag = 0;
701 ptps->general_profile_idc = real_hevc_profile;
702 memset(ptps->general_profile_compatibility_flag, 0, 32 * sizeof(int));
703 ptps->general_profile_compatibility_flag[ptps->general_profile_idc] = 1;
704 ptps->general_progressive_source_flag = 1;
705 ptps->general_interlaced_source_flag = 0;
706 ptps->general_non_packed_constraint_flag = 0;
707 ptps->general_frame_only_constraint_flag = 1;
708
709 ptps->general_level_idc = 30;
710 ptps->general_level_idc = ptps->general_level_idc * 4;
711
712 }
fill_vps_header(struct VideoParamSet * vps)713 static void fill_vps_header(struct VideoParamSet *vps)
714 {
715 int i = 0;
716 memset(vps, 0, sizeof(*vps));
717
718 vps->vps_video_parameter_set_id = 0;
719 vps->vps_base_layer_internal_flag = 1;
720 vps->vps_base_layer_available_flag = 1;
721 vps->vps_max_layers_minus1 = 0;
722 vps->vps_max_sub_layers_minus1 = 0; // max temporal layer minus 1
723 vps->vps_temporal_id_nesting_flag = 1;
724 vps->vps_reserved_0xffff_16bits = 0xFFFF;
725 // hevc::ProfileTierParamSet ptps;
726 memset(&vps->ptps, 0, sizeof(vps->ptps));
727 fill_profile_tier_level(vps->vps_max_layers_minus1, &protier_param, 1);
728 vps->vps_sub_layer_ordering_info_present_flag = 0;
729 for (i = 0; i < MAX_TEMPORAL_SUBLAYERS; i++) {
730 vps->vps_max_dec_pic_buffering_minus1[i] = intra_period == 1 ? 1 : 6;
731 vps->vps_max_num_reorder_pics[i] = ip_period != 0 ? ip_period - 1 : 0;
732 vps->vps_max_latency_increase_plus1[i] = 0;
733 }
734 vps->vps_max_layer_id = 0;
735 vps->vps_num_layer_sets_minus1 = 0;
736 vps->vps_sub_layer_ordering_info_present_flag = 0;
737 vps->vps_max_nuh_reserved_zero_layer_id = 0;
738 vps->vps_max_op_sets = 1;
739 vps->vps_timing_info_present_flag = 0;
740 vps->vps_extension_flag = 0;
741 }
742
fill_short_term_ref_pic_header(struct ShortTermRefPicParamSet * strp,uint8_t strp_index)743 static void fill_short_term_ref_pic_header(
744 struct ShortTermRefPicParamSet *strp,
745 uint8_t strp_index)
746 {
747 uint32_t i = 0;
748 // inter_ref_pic_set_prediction_flag is always 0 now
749 strp->inter_ref_pic_set_prediction_flag = 0;
750 /* don't need to set below parameters since inter_ref_pic_set_prediction_flag equal to 0
751 strp->delta_idx_minus1 should be set to 0 since strp_index != num_short_term_ref_pic_sets in sps
752 strp->delta_rps_sign;
753 strp->abs_delta_rps_minus1;
754 strp->used_by_curr_pic_flag[j];
755 strp->use_delta_flag[j];
756 */
757 strp->num_negative_pics = num_active_ref_p;
758 int num_positive_pics = ip_period > 1 ? 1 : 0;
759 strp->num_positive_pics = strp_index == 0 ? 0 : num_positive_pics;
760
761 if (strp_index == 0) {
762 for (i = 0; i < strp->num_negative_pics; i++) {
763 strp->delta_poc_s0_minus1[i] = ip_period - 1;
764 strp->used_by_curr_pic_s0_flag[i] = 1;
765 }
766 } else {
767 for (i = 0; i < strp->num_negative_pics; i++) {
768 strp->delta_poc_s0_minus1[i] = (i == 0) ?
769 (strp_index - 1) : (ip_period - 1);
770 strp->used_by_curr_pic_s0_flag[i] = 1;
771 }
772 for (i = 0; i < strp->num_positive_pics; i++) {
773 strp->delta_poc_s1_minus1[i] = ip_period - 1 - strp_index;
774 strp->used_by_curr_pic_s1_flag[i] = 1;
775 }
776
777 }
778 }
779
fill_sps_header(struct SeqParamSet * sps,int id)780 void fill_sps_header(struct SeqParamSet *sps, int id)
781 {
782 int i = 0;
783 memset(sps, 0, sizeof(struct SeqParamSet));
784
785 sps->sps_video_parameter_set_id = 0;
786 sps->sps_max_sub_layers_minus1 = 0;
787 sps->sps_temporal_id_nesting_flag = 1;
788 fill_profile_tier_level(sps->sps_max_sub_layers_minus1, &sps->ptps, 1);
789 sps->sps_seq_parameter_set_id = id;
790 sps->chroma_format_idc = 1;
791 if (sps->chroma_format_idc == 3) {
792 sps->separate_colour_plane_flag = use_features ? features.separate_colour_planes : 0;
793 }
794 frame_width_aligned = ALIGN16(frame_width);
795 frame_height_aligned = ALIGN16(frame_height);
796 sps->pic_width_in_luma_samples = frame_width_aligned;
797 sps->pic_height_in_luma_samples = frame_height_aligned;
798 if (frame_width_aligned != frame_width ||
799 frame_height_aligned != frame_height) {
800 sps->conformance_window_flag = 1;
801 sps->conf_win_left_offset = 0;
802 sps->conf_win_top_offset = 0;
803 switch (sps->chroma_format_idc) {
804 case 0:
805 case 3: // 4:4:4 format
806 sps->conf_win_right_offset = (frame_width_aligned - frame_width);
807 sps->conf_win_bottom_offset = (frame_height_aligned - frame_height);
808 break;
809
810 case 2: // 4:2:2 format
811 sps->conf_win_right_offset = (frame_width_aligned - frame_width) >> 1;
812 sps->conf_win_bottom_offset = (frame_height_aligned - frame_height);
813 break;
814
815 case 1:
816 default: // 4:2:0 format
817 sps->conf_win_right_offset = (frame_width_aligned - frame_width) >> 1;
818 sps->conf_win_bottom_offset = (frame_height_aligned - frame_height) >> 1;
819 break;
820 }
821 } else {
822 sps->conformance_window_flag = 0;
823 }
824
825 sps->bit_depth_luma_minus8 = 0;
826 sps->bit_depth_chroma_minus8 = 0;
827 sps->log2_max_pic_order_cnt_lsb_minus4 = MAX((ceil(log(ip_period - 1 + 4) / log(2.0)) + 3), 4) - 4;
828 sps->sps_sub_layer_ordering_info_present_flag = 0;
829 for (i = 0; i < MAX_TEMPORAL_SUBLAYERS; i++) {
830 sps->sps_max_dec_pic_buffering_minus1[i] = intra_period == 1 ? 1 : 6;
831 sps->sps_max_num_reorder_pics[i] = ip_period != 0 ? ip_period - 1 : 0;
832 sps->sps_max_latency_increase_plus1[i] = 0;
833 }
834 sps->log2_min_luma_coding_block_size_minus3 = use_block_sizes ? block_sizes.log2_min_luma_coding_block_size_minus3 : 0;
835 int log2_max_luma_coding_block_size = use_block_sizes ? block_sizes.log2_max_coding_tree_block_size_minus3 + 3 : log2(LCU_SIZE);
836 int log2_min_luma_coding_block_size = sps->log2_min_luma_coding_block_size_minus3 + 3;
837 sps->log2_diff_max_min_luma_coding_block_size = log2_max_luma_coding_block_size -
838 log2_min_luma_coding_block_size;
839 sps->log2_min_luma_transform_block_size_minus2 = use_block_sizes ? block_sizes.log2_min_luma_transform_block_size_minus2 : 0;
840 sps->log2_diff_max_min_luma_transform_block_size = use_block_sizes ? (block_sizes.log2_max_luma_transform_block_size_minus2 -
841 sps->log2_min_luma_transform_block_size_minus2) : 3;
842 sps->max_transform_hierarchy_depth_inter = use_block_sizes ? block_sizes.max_max_transform_hierarchy_depth_inter : 2;
843 sps->max_transform_hierarchy_depth_intra = use_block_sizes ? block_sizes.max_max_transform_hierarchy_depth_intra : 2;
844 sps->scaling_list_enabled_flag = use_features ? features.scaling_lists : 0;
845 sps->sps_scaling_list_data_present_flag = 0;
846 sps->amp_enabled_flag = use_features ? features.amp : 1;
847 sps->sample_adaptive_offset_enabled_flag = use_features ? features.sao : 1;
848 sps->pcm_enabled_flag = use_features ? features.pcm : 0;
849 /* ignore below parameters seting since pcm_enabled_flag equal to 0
850 pcm_sample_bit_depth_luma_minus1;
851 pcm_sample_bit_depth_chroma_minus1;
852 log2_min_pcm_luma_coding_block_size_minus3;
853 log2_diff_max_min_pcm_luma_coding_block_size;
854 pcm_loop_filter_disabled_flag;
855 */
856 sps->num_short_term_ref_pic_sets = ip_period;
857
858 memset(&sps->strp[0], 0, sizeof(sps->strp));
859 for (i = 0; i < MIN(sps->num_short_term_ref_pic_sets, 64); i++)
860 fill_short_term_ref_pic_header(&sps->strp[i], i);
861 sps->long_term_ref_pics_present_flag = 0;
862 /* ignore below parameters seting since long_term_ref_pics_present_flag equal to 0
863 num_long_term_ref_pics_sps;
864 lt_ref_pic_poc_lsb_sps[kMaxLongTermRefPic];
865 used_by_curr_pic_lt_sps_flag[kMaxLongTermRefPic];
866 */
867 sps->sps_temporal_mvp_enabled_flag = use_features ? features.temporal_mvp : 1;
868 sps->strong_intra_smoothing_enabled_flag = use_features ? features.strong_intra_smoothing : 0;
869
870 sps->vui_parameters_present_flag = 0;
871 sps->sps_extension_present_flag = 0;
872 /* ignore below parameters seting since sps_extension_present_flag equal to 0
873 sps->sps_range_extension_flag
874 sps->sps_multilayer_extension_flag
875 sps->sps_3d_extension_flag
876 sps->sps_extension_5bits
877 sps->sps_extension_data_flag
878 */
879 }
880
fill_pps_header(struct PicParamSet * pps,uint32_t pps_id,uint32_t sps_id)881 static void fill_pps_header(
882 struct PicParamSet *pps,
883 uint32_t pps_id,
884 uint32_t sps_id)
885 {
886 memset(pps, 0, sizeof(struct PicParamSet));
887
888 pps->pps_pic_parameter_set_id = pps_id;
889 pps->pps_seq_parameter_set_id = sps_id;
890 pps->dependent_slice_segments_enabled_flag = use_features ? features.dependent_slices : 0;
891 pps->output_flag_present_flag = 0;
892 pps->num_extra_slice_header_bits = 0;
893 pps->sign_data_hiding_enabled_flag = use_features ? features.sign_data_hiding : 0;
894 pps->cabac_init_present_flag = 1;
895
896 pps->num_ref_idx_l0_default_active_minus1 = 0;
897 pps->num_ref_idx_l1_default_active_minus1 = 0;
898
899 pps->init_qp_minus26 = initial_qp - 26;
900 pps->constrained_intra_pred_flag = use_features ? features.constrained_intra_pred : 0;
901 pps->transform_skip_enabled_flag = use_features ? features.transform_skip : 0;
902 pps->cu_qp_delta_enabled_flag = use_features ? features.cu_qp_delta : 1;
903 if (pps->cu_qp_delta_enabled_flag)
904 pps->diff_cu_qp_delta_depth = 2;
905 pps->pps_cb_qp_offset = 0;
906 pps->pps_cr_qp_offset = 0;
907 pps->pps_slice_chroma_qp_offsets_present_flag = 0;
908 pps->weighted_pred_flag = use_features ? features.weighted_prediction : 0;
909 pps->weighted_bipred_flag = 0;
910 pps->transquant_bypass_enabled_flag = use_features ? features.transquant_bypass : 0;
911 pps->entropy_coding_sync_enabled_flag = 0;
912 pps->tiles_enabled_flag = 0;
913
914 pps->pps_loop_filter_across_slices_enabled_flag = 0;
915 pps->deblocking_filter_control_present_flag = 1;
916 pps->deblocking_filter_override_enabled_flag = 0,
917 pps->pps_deblocking_filter_disabled_flag = use_features ? features.deblocking_filter_disable : 0,
918 pps->pps_beta_offset_div2 = 2,
919 pps->pps_tc_offset_div2 = 0,
920 pps->pps_scaling_list_data_present_flag = 0;
921 pps->lists_modification_present_flag = 0;
922 pps->log2_parallel_merge_level_minus2 = 0;
923 pps->slice_segment_header_extension_present_flag = 0;
924 pps->pps_extension_present_flag = 0;
925 pps->pps_range_extension_flag = 0;
926
927 }
fill_slice_header(uint32_t count,struct PicParamSet * pps,struct SliceHeader * slice)928 static void fill_slice_header(
929 uint32_t count,
930 struct PicParamSet *pps,
931 struct SliceHeader *slice)
932 {
933 memset(slice, 0, sizeof(struct SliceHeader));
934 slice->pic_output_flag = 1;
935 slice->colour_plane_id = 0;
936 slice->no_output_of_prior_pics_flag = 0;
937 slice->pic_order_cnt_lsb = calc_poc((current_frame_display - current_IDR_display) % MaxPicOrderCntLsb);
938
939 //slice_segment_address (u(v))
940 int lcu_size = use_block_sizes ? (1 << (block_sizes.log2_max_coding_tree_block_size_minus3 + 3)) : LCU_SIZE;
941 slice->picture_height_in_ctus = (frame_height + lcu_size - 1) / lcu_size;
942 slice->picture_width_in_ctus = (frame_width + lcu_size - 1) / lcu_size;
943 slice->slice_segment_address = 0;
944 slice->first_slice_segment_in_pic_flag = ((slice->slice_segment_address == 0) ? 1 : 0);
945 slice->slice_type = current_frame_type == FRAME_P ? (p2b ? SLICE_B : SLICE_P) :
946 current_frame_type == FRAME_B ? SLICE_B : SLICE_I;
947
948 slice->dependent_slice_segment_flag = 0;
949 slice->short_term_ref_pic_set_sps_flag = 1;
950 slice->num_ref_idx_active_override_flag = 0;
951 slice->short_term_ref_pic_set_idx = slice->pic_order_cnt_lsb % ip_period;
952 slice->strp.num_negative_pics = numShortTerm;
953 slice->strp.num_positive_pics = 0;
954 slice->slice_sao_luma_flag = 0;
955 slice->slice_sao_chroma_flag = 0;
956 slice->slice_temporal_mvp_enabled_flag = use_features ? features.temporal_mvp : 1;
957
958 slice->num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
959 slice->num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
960
961 slice->num_poc_total_cur = 0;
962 // for I slice
963 if (current_frame_type == FRAME_I || current_frame_type == FRAME_IDR) {
964 slice->ref_pic_list_modification_flag_l0 = 0;
965 slice->list_entry_l0 = 0;
966 slice->ref_pic_list_modification_flag_l1 = 0;
967 slice->list_entry_l1 = 0;
968 } else {
969 slice->ref_pic_list_modification_flag_l0 = 1;
970 slice->num_poc_total_cur = 2;
971 }
972
973 slice->ref_pic_list_combination_flag = 0;
974 slice->num_ref_idx_lc_active_minus1 = 0;
975 slice->ref_pic_list_modification_flag_lc = 0;
976 slice->pic_from_list_0_flag = 0;
977 slice->ref_idx_list_curr = 0;
978 slice->mvd_l1_zero_flag = 0;
979 slice->cabac_init_present_flag = 0;
980
981 slice->slice_qp_delta = 0;
982 slice->slice_qp_delta_cb = pps->pps_cb_qp_offset;
983 slice->slice_qp_delta_cr = pps->pps_cr_qp_offset;
984
985 slice->deblocking_filter_override_flag = 0;
986 slice->disable_deblocking_filter_flag = 0;
987 slice->tc_offset_div2 = pps->pps_tc_offset_div2;
988 slice->beta_offset_div2 = pps->pps_beta_offset_div2;
989
990 slice->collocated_from_l0_flag = 1;
991 slice->collocated_ref_idx = pps->num_ref_idx_l0_default_active_minus1;
992
993 slice->five_minus_max_num_merge_cand = 0;
994
995 slice->slice_loop_filter_across_slices_enabled_flag = 0;
996 slice->num_entry_point_offsets = 0;
997 slice->offset_len_minus1 = 0;
998 }
999
protier_rbsp(bitstream * bs)1000 static void protier_rbsp(bitstream *bs)
1001 {
1002 uint32_t i = 0;
1003 put_ui(bs, protier_param.general_profile_space, 2);
1004 put_ui(bs, protier_param.general_tier_flag, 1);
1005 put_ui(bs, protier_param.general_profile_idc, 5);
1006
1007 for (i = 0; i < 32; i++)
1008 put_ui(bs, protier_param.general_profile_compatibility_flag[i], 1);
1009
1010 put_ui(bs, protier_param.general_progressive_source_flag, 1);
1011 put_ui(bs, protier_param.general_interlaced_source_flag, 1);
1012 put_ui(bs, protier_param.general_non_packed_constraint_flag, 1);
1013 put_ui(bs, protier_param.general_frame_only_constraint_flag, 1);
1014 put_ui(bs, 0, 16);
1015 put_ui(bs, 0, 16);
1016 put_ui(bs, 0, 12);
1017 put_ui(bs, protier_param.general_level_idc, 8);
1018 }
pack_short_term_ref_pic_setp(bitstream * bs,struct ShortTermRefPicParamSet * strp,int first_strp)1019 void pack_short_term_ref_pic_setp(
1020 bitstream *bs,
1021 struct ShortTermRefPicParamSet* strp,
1022 int first_strp)
1023 {
1024 uint32_t i = 0;
1025 if (!first_strp)
1026 put_ui(bs, strp->inter_ref_pic_set_prediction_flag, 1);
1027
1028 // inter_ref_pic_set_prediction_flag is always 0 now
1029 put_ue(bs, strp->num_negative_pics);
1030 put_ue(bs, strp->num_positive_pics);
1031
1032 for (i = 0; i < strp->num_negative_pics; i++) {
1033 put_ue(bs, strp->delta_poc_s0_minus1[i]);
1034 put_ui(bs, strp->used_by_curr_pic_s0_flag[i], 1);
1035 }
1036 for (i = 0; i < strp->num_positive_pics; i++) {
1037 put_ue(bs, strp->delta_poc_s1_minus1[i]);
1038 put_ui(bs, strp->used_by_curr_pic_s1_flag[i], 1);
1039 }
1040 }
vps_rbsp(bitstream * bs)1041 static void vps_rbsp(bitstream *bs)
1042 {
1043 uint32_t i = 0;
1044 put_ui(bs, vps.vps_video_parameter_set_id, 4);
1045 put_ui(bs, 3, 2); //vps_reserved_three_2bits
1046 put_ui(bs, 0, 6); //vps_reserved_zero_6bits
1047
1048 put_ui(bs, vps.vps_max_sub_layers_minus1, 3);
1049 put_ui(bs, vps.vps_temporal_id_nesting_flag, 1);
1050 put_ui(bs, 0xFFFF, 16); //vps_reserved_0xffff_16bits
1051 protier_rbsp(bs);
1052
1053 put_ui(bs, vps.vps_sub_layer_ordering_info_present_flag, 1);
1054
1055 for (i = (vps.vps_sub_layer_ordering_info_present_flag ? 0 : vps.vps_max_sub_layers_minus1); i <= vps.vps_max_sub_layers_minus1; i++) {
1056 // NOTE: In teddi and mv_encoder, the setting is max_dec_pic_buffering.
1057 // here just follow the spec 7.3.2.1
1058 put_ue(bs, vps.vps_max_dec_pic_buffering_minus1[i]);
1059 put_ue(bs, vps.vps_max_num_reorder_pics[i]);
1060 put_ue(bs, vps.vps_max_latency_increase_plus1[i]);
1061 }
1062
1063 put_ui(bs, vps.vps_max_nuh_reserved_zero_layer_id, 6);
1064 put_ue(bs, vps.vps_num_op_sets_minus1);
1065
1066 put_ui(bs, vps.vps_timing_info_present_flag, 1);
1067
1068 if (vps.vps_timing_info_present_flag) {
1069 put_ue(bs, vps.vps_num_units_in_tick);
1070 put_ue(bs, vps.vps_time_scale);
1071 put_ue(bs, vps.vps_poc_proportional_to_timing_flag);
1072 if (vps.vps_poc_proportional_to_timing_flag) {
1073 put_ue(bs, vps.vps_num_ticks_poc_diff_one_minus1);
1074 }
1075 put_ue(bs, vps.vps_num_hrd_parameters);
1076 for (i = 0; i < vps.vps_num_hrd_parameters; i++) {
1077 put_ue(bs, vps.hrd_layer_set_idx[i]);
1078 if (i > 0) {
1079 put_ui(bs, vps.cprms_present_flag[i], 1);
1080 }
1081 }
1082 }
1083
1084 // no extension flag
1085 put_ui(bs, 0, 1);
1086 }
1087
sps_rbsp(bitstream * bs)1088 static void sps_rbsp(bitstream *bs)
1089 {
1090 uint32_t i = 0;
1091 put_ui(bs, sps.sps_video_parameter_set_id, 4);
1092 put_ui(bs, sps.sps_max_sub_layers_minus1, 3);
1093 put_ui(bs, sps.sps_temporal_id_nesting_flag, 1);
1094
1095 protier_rbsp(bs);
1096
1097 put_ue(bs, sps.sps_seq_parameter_set_id);
1098 put_ue(bs, sps.chroma_format_idc);
1099
1100 if (sps.chroma_format_idc == 3) {
1101 put_ui(bs, sps.separate_colour_plane_flag, 1);
1102
1103 }
1104 put_ue(bs, sps.pic_width_in_luma_samples);
1105 put_ue(bs, sps.pic_height_in_luma_samples);
1106
1107 put_ui(bs, sps.conformance_window_flag, 1);
1108
1109 if (sps.conformance_window_flag) {
1110 put_ue(bs, sps.conf_win_left_offset);
1111 put_ue(bs, sps.conf_win_right_offset);
1112 put_ue(bs, sps.conf_win_top_offset);
1113 put_ue(bs, sps.conf_win_bottom_offset);
1114 }
1115 put_ue(bs, sps.bit_depth_luma_minus8);
1116 put_ue(bs, sps.bit_depth_chroma_minus8);
1117 put_ue(bs, sps.log2_max_pic_order_cnt_lsb_minus4);
1118 put_ui(bs, sps.sps_sub_layer_ordering_info_present_flag, 1);
1119
1120 for (i = (sps.sps_sub_layer_ordering_info_present_flag ? 0 : sps.sps_max_sub_layers_minus1); i <= sps.sps_max_sub_layers_minus1; i++) {
1121 // NOTE: In teddi and mv_encoder, the setting is max_dec_pic_buffering.
1122 // here just follow the spec 7.3.2.2
1123 put_ue(bs, sps.sps_max_dec_pic_buffering_minus1[i]);
1124 put_ue(bs, sps.sps_max_num_reorder_pics[i]);
1125 put_ue(bs, sps.sps_max_latency_increase_plus1[i]);
1126 }
1127
1128 put_ue(bs, sps.log2_min_luma_coding_block_size_minus3);
1129 put_ue(bs, sps.log2_diff_max_min_luma_coding_block_size);
1130 put_ue(bs, sps.log2_min_luma_transform_block_size_minus2);
1131 put_ue(bs, sps.log2_diff_max_min_luma_transform_block_size);
1132 put_ue(bs, sps.max_transform_hierarchy_depth_inter);
1133 put_ue(bs, sps.max_transform_hierarchy_depth_intra);
1134
1135 // scaling_list_enabled_flag is set as 0 in fill_sps_header() for now
1136 put_ui(bs, sps.scaling_list_enabled_flag, 1);
1137 if (sps.scaling_list_enabled_flag) {
1138 put_ui(bs, sps.sps_scaling_list_data_present_flag, 1);
1139 if (sps.sps_scaling_list_data_present_flag) {
1140 //scaling_list_data();
1141 }
1142 }
1143
1144 put_ui(bs, sps.amp_enabled_flag, 1);
1145 put_ui(bs, sps.sample_adaptive_offset_enabled_flag, 1);
1146
1147 // pcm_enabled_flag is set as 0 in fill_sps_header() for now
1148 put_ui(bs, sps.pcm_enabled_flag, 1);
1149 if (sps.pcm_enabled_flag) {
1150 put_ui(bs, sps.pcm_sample_bit_depth_luma_minus1, 4);
1151 put_ui(bs, sps.pcm_sample_bit_depth_chroma_minus1, 4);
1152 put_ue(bs, sps.log2_min_pcm_luma_coding_block_size_minus3);
1153 put_ue(bs, sps.log2_diff_max_min_pcm_luma_coding_block_size);
1154 put_ui(bs, sps.pcm_loop_filter_disabled_flag, 1);
1155 }
1156
1157 put_ue(bs, sps.num_short_term_ref_pic_sets);
1158 for (i = 0; i < sps.num_short_term_ref_pic_sets; i++) {
1159 pack_short_term_ref_pic_setp(bs, &sps.strp[i], i == 0);
1160 }
1161
1162 // long_term_ref_pics_present_flag is set as 0 in fill_sps_header() for now
1163 put_ui(bs, sps.long_term_ref_pics_present_flag, 1);
1164 if (sps.long_term_ref_pics_present_flag) {
1165 put_ue(bs, sps.num_long_term_ref_pics_sps);
1166 for (i = 0; i < sps.num_long_term_ref_pics_sps; i++) {
1167 put_ue(bs, sps.lt_ref_pic_poc_lsb_sps[i]);
1168 put_ui(bs, sps.used_by_curr_pic_lt_sps_flag[i], 1);
1169 }
1170 }
1171
1172 put_ui(bs, sps.sps_temporal_mvp_enabled_flag, 1);
1173 put_ui(bs, sps.strong_intra_smoothing_enabled_flag, 1);
1174
1175 // vui_parameters_present_flag is set as 0 in fill_sps_header() for now
1176 put_ui(bs, sps.vui_parameters_present_flag, 1);
1177
1178 put_ui(bs, sps.sps_extension_present_flag, 1);
1179 }
1180
pps_rbsp(bitstream * bs)1181 static void pps_rbsp(bitstream *bs)
1182 {
1183 uint32_t i = 0;
1184 put_ue(bs, pps.pps_pic_parameter_set_id);
1185 put_ue(bs, pps.pps_seq_parameter_set_id);
1186 put_ui(bs, pps.dependent_slice_segments_enabled_flag, 1);
1187 put_ui(bs, pps.output_flag_present_flag, 1);
1188 put_ui(bs, pps.num_extra_slice_header_bits, 3);
1189 put_ui(bs, pps.sign_data_hiding_enabled_flag, 1);
1190 put_ui(bs, pps.cabac_init_present_flag, 1);
1191
1192 put_ue(bs, pps.num_ref_idx_l0_default_active_minus1);
1193 put_ue(bs, pps.num_ref_idx_l1_default_active_minus1);
1194 put_se(bs, pps.init_qp_minus26);
1195
1196 put_ui(bs, pps.constrained_intra_pred_flag, 1);
1197 put_ui(bs, pps.transform_skip_enabled_flag, 1);
1198
1199 put_ui(bs, pps.cu_qp_delta_enabled_flag, 1);
1200 if (pps.cu_qp_delta_enabled_flag) {
1201 put_ue(bs, pps.diff_cu_qp_delta_depth);
1202 }
1203
1204 put_se(bs, pps.pps_cb_qp_offset);
1205 put_se(bs, pps.pps_cr_qp_offset);
1206
1207 put_ui(bs, pps.pps_slice_chroma_qp_offsets_present_flag, 1);
1208 put_ui(bs, pps.weighted_pred_flag, 1);
1209 put_ui(bs, pps.weighted_bipred_flag, 1);
1210 put_ui(bs, pps.transquant_bypass_enabled_flag, 1);
1211 put_ui(bs, pps.tiles_enabled_flag, 1);
1212 put_ui(bs, pps.entropy_coding_sync_enabled_flag, 1);
1213
1214 if (pps.tiles_enabled_flag) {
1215 put_ue(bs, pps.num_tile_columns_minus1);
1216 put_ue(bs, pps.num_tile_rows_minus1);
1217 put_ui(bs, pps.uniform_spacing_flag, 1);
1218 if (!pps.uniform_spacing_flag) {
1219 for (i = 0; i < pps.num_tile_columns_minus1; i++) {
1220 put_ue(bs, pps.column_width_minus1[i]);
1221 }
1222
1223 for (i = 0; i < pps.num_tile_rows_minus1; i++) {
1224 put_ue(bs, pps.row_height_minus1[i]);
1225 }
1226
1227 }
1228 put_ui(bs, pps.loop_filter_across_tiles_enabled_flag, 1);
1229 }
1230
1231 put_ui(bs, pps.pps_loop_filter_across_slices_enabled_flag, 1);
1232 put_ui(bs, pps.deblocking_filter_control_present_flag, 1);
1233 if (pps.deblocking_filter_control_present_flag) {
1234 put_ui(bs, pps.deblocking_filter_override_enabled_flag, 1);
1235 put_ui(bs, pps.pps_deblocking_filter_disabled_flag, 1);
1236 if (!pps.pps_deblocking_filter_disabled_flag) {
1237 put_se(bs, pps.pps_beta_offset_div2);
1238 put_se(bs, pps.pps_tc_offset_div2);
1239 }
1240 }
1241
1242 // pps_scaling_list_data_present_flag is set as 0 in fill_pps_header() for now
1243 put_ui(bs, pps.pps_scaling_list_data_present_flag, 1);
1244 if (pps.pps_scaling_list_data_present_flag) {
1245 //scaling_list_data();
1246 }
1247
1248 put_ui(bs, pps.lists_modification_present_flag, 1);
1249 put_ue(bs, pps.log2_parallel_merge_level_minus2);
1250 put_ui(bs, pps.slice_segment_header_extension_present_flag, 1);
1251
1252 put_ui(bs, pps.pps_extension_present_flag, 1);
1253 if (pps.pps_extension_present_flag) {
1254 put_ui(bs, pps.pps_range_extension_flag, 1);
1255 put_ui(bs, pps.pps_multilayer_extension_flag, 1);
1256 put_ui(bs, pps.pps_3d_extension_flag, 1);
1257 put_ui(bs, pps.pps_extension_5bits, 1);
1258
1259 }
1260
1261 if (pps.pps_range_extension_flag) {
1262 if (pps.transform_skip_enabled_flag)
1263 put_ue(bs, pps.log2_max_transform_skip_block_size_minus2);
1264 put_ui(bs, pps.cross_component_prediction_enabled_flag, 1);
1265 put_ui(bs, pps.chroma_qp_offset_list_enabled_flag, 1);
1266
1267 if (pps.chroma_qp_offset_list_enabled_flag) {
1268 put_ue(bs, pps.diff_cu_chroma_qp_offset_depth);
1269 put_ue(bs, pps.chroma_qp_offset_list_len_minus1);
1270 for (i = 0; i <= pps.chroma_qp_offset_list_len_minus1; i++) {
1271 put_ue(bs, pps.cb_qp_offset_list[i]);
1272 put_ue(bs, pps.cr_qp_offset_list[i]);
1273 }
1274 }
1275
1276 put_ue(bs, pps.log2_sao_offset_scale_luma);
1277 put_ue(bs, pps.log2_sao_offset_scale_chroma);
1278 }
1279
1280 }
sliceHeader_rbsp(bitstream * bs,struct SliceHeader * slice_header,struct SeqParamSet * sps,struct PicParamSet * pps,int isidr)1281 static void sliceHeader_rbsp(
1282 bitstream *bs,
1283 struct SliceHeader *slice_header,
1284 struct SeqParamSet *sps,
1285 struct PicParamSet *pps,
1286 int isidr)
1287 {
1288 uint8_t nal_unit_type = NALU_TRAIL_R;
1289 int gop_ref_distance = ip_period;
1290 int i = 0;
1291
1292 put_ui(bs, slice_header->first_slice_segment_in_pic_flag, 1);
1293 if (slice_header->pic_order_cnt_lsb == 0)
1294 nal_unit_type = NALU_IDR_W_DLP;
1295
1296 if (nal_unit_type >= 16 && nal_unit_type <= 23)
1297 put_ui(bs, slice_header->no_output_of_prior_pics_flag, 1);
1298
1299 put_ue(bs, slice_header->slice_pic_parameter_set_id);
1300
1301 if (!slice_header->first_slice_segment_in_pic_flag) {
1302 if (slice_header->dependent_slice_segment_flag) {
1303 put_ui(bs, slice_header->dependent_slice_segment_flag, 1);
1304 }
1305
1306 put_ui(bs, slice_header->slice_segment_address,
1307 (uint8_t)(ceil(log(slice_header->picture_height_in_ctus * slice_header->picture_width_in_ctus) / log(2.0))));
1308 }
1309 if (!slice_header->dependent_slice_segment_flag) {
1310 for (i = 0; i < pps->num_extra_slice_header_bits; i++) {
1311 put_ui(bs, slice_header->slice_reserved_undetermined_flag[i], 1);
1312 }
1313 put_ue(bs, slice_header->slice_type);
1314 if (pps->output_flag_present_flag) {
1315 put_ui(bs, slice_header->pic_output_flag, 1);
1316 }
1317 if (sps->separate_colour_plane_flag == 1) {
1318 put_ui(bs, slice_header->colour_plane_id, 2);
1319 }
1320
1321 if (!(nal_unit_type == NALU_IDR_W_DLP || nal_unit_type == NALU_IDR_N_LP)) {
1322 put_ui(bs, slice_header->pic_order_cnt_lsb, (sps->log2_max_pic_order_cnt_lsb_minus4 + 4));
1323 put_ui(bs, slice_header->short_term_ref_pic_set_sps_flag, 1);
1324
1325 if (!slice_header->short_term_ref_pic_set_sps_flag) {
1326 // refer to Teddi
1327 if (sps->num_short_term_ref_pic_sets > 0)
1328 put_ui(bs, 0, 1); // inter_ref_pic_set_prediction_flag, always 0 for now
1329
1330 put_ue(bs, slice_header->strp.num_negative_pics);
1331 put_ue(bs, slice_header->strp.num_positive_pics);
1332
1333 // below chunks of codes (majorly two big 'for' blocks) are refering both
1334 // Teddi and mv_encoder, they look kind of ugly, however, keep them as these
1335 // since it will be pretty easy to update if change/update in Teddi side.
1336 // According to Teddi, these are CModel Implementation.
1337 int prev = 0;
1338 int frame_cnt_in_gop = slice_header->pic_order_cnt_lsb / 2;
1339 // this is the first big 'for' block
1340 for (i = 0; i < slice_header->strp.num_negative_pics; i++) {
1341 // Low Delay B case
1342 if (1 == gop_ref_distance) {
1343 put_ue(bs, 0 /*delta_poc_s0_minus1*/);
1344 } else {
1345 // For Non-BPyramid GOP i.e B0 type
1346 if (num_active_ref_p > 1) {
1347 // DeltaPOC Equals NumB
1348 int DeltaPoc = -(int)(gop_ref_distance);
1349 put_ue(bs, prev - DeltaPoc - 1 /*delta_poc_s0_minus1*/);
1350 } else {
1351 // the big 'if' wraps here is -
1352 // if (!slice_header->short_term_ref_pic_set_sps_flag)
1353 // From the Teddi logic, the short_term_ref_pic_set_sps_flag only can be '0'
1354 // either for B-Prymid or first several frames in a GOP in multi-ref cases
1355 // when there are not enough backward refs.
1356 // So though there are really some codes under this 'else'in Teddi, don't
1357 // want to introduce them in MEA to avoid confusion, and put an assert
1358 // here to guard that there is new case we need handle in the future.
1359 assert(0);
1360 }
1361 }
1362 put_ui(bs, 1 /*used_by_curr_pic_s0_flag*/, 1);
1363 }
1364
1365 prev = 0;
1366 // this is the second big 'for' block
1367 for (i = 0; i < slice_header->strp.num_positive_pics; i++) {
1368 // Non-BPyramid GOP
1369 if (num_active_ref_p > 1) {
1370 // MultiRef Case
1371 if (frame_cnt_in_gop < gop_ref_distance) {
1372 int DeltaPoc = (int)(gop_ref_distance - frame_cnt_in_gop);
1373 put_ue(bs, DeltaPoc - prev - 1 /*delta_poc_s1_minus1*/);
1374 } else if (frame_cnt_in_gop > gop_ref_distance) {
1375 int DeltaPoc = (int)(gop_ref_distance * slice_header->strp.num_negative_pics - frame_cnt_in_gop);
1376 put_ue(bs, DeltaPoc - prev - 1 /*delta_poc_s1_minus1*/);
1377 }
1378 } else {
1379 // the big 'if' wraps here is -
1380 // if (!slice_header->short_term_ref_pic_set_sps_flag)
1381 // From the Teddi logic, the short_term_ref_pic_set_sps_flag only can be '0'
1382 // either for B-Prymid or first several frames in a GOP in multi-ref cases
1383 // when there are not enough backward refs.
1384 // So though there are really some codes under this 'else'in Teddi, don't
1385 // want to introduce them in MEA to avoid confusion, and put an assert
1386 // here to guard that there is new case we need handle in the future.
1387 assert(0);
1388 }
1389 put_ui(bs, 1 /*used_by_curr_pic_s1_flag*/, 1);
1390 }
1391 } else if (sps->num_short_term_ref_pic_sets > 1)
1392 put_ui(bs, slice_header->short_term_ref_pic_set_idx,
1393 (uint8_t)(ceil(log(sps->num_short_term_ref_pic_sets) / log(2.0))));
1394
1395 if (sps->long_term_ref_pics_present_flag) {
1396 if (sps->num_long_term_ref_pics_sps > 0)
1397 put_ue(bs, slice_header->num_long_term_sps);
1398
1399 put_ue(bs, slice_header->num_long_term_pics);
1400 }
1401
1402 if (sps->sps_temporal_mvp_enabled_flag)
1403 put_ui(bs, slice_header->slice_temporal_mvp_enabled_flag, 1);
1404
1405 }
1406
1407 if (sps->sample_adaptive_offset_enabled_flag) {
1408 put_ui(bs, slice_header->slice_sao_luma_flag, 1);
1409 put_ui(bs, slice_header->slice_sao_chroma_flag, 1);
1410 }
1411
1412 if (slice_header->slice_type != SLICE_I) {
1413 put_ui(bs, slice_header->num_ref_idx_active_override_flag, 1);
1414
1415 if (slice_header->num_ref_idx_active_override_flag) {
1416 put_ue(bs, slice_header->num_ref_idx_l0_active_minus1);
1417 if (slice_header->slice_type == SLICE_B)
1418 put_ue(bs, slice_header->num_ref_idx_l1_active_minus1);
1419 }
1420
1421 if (pps->lists_modification_present_flag && slice_header->num_poc_total_cur > 1) {
1422 /* ref_pic_list_modification */
1423 put_ui(bs, slice_header->ref_pic_list_modification_flag_l0, 1);
1424
1425 if (slice_header->ref_pic_list_modification_flag_l0) {
1426 for (i = 0; i <= slice_header->num_ref_idx_l0_active_minus1; i++) {
1427 put_ui(bs, slice_header->list_entry_l0[i],
1428 (uint8_t)(ceil(log(slice_header->num_poc_total_cur) / log(2.0))));
1429 }
1430 }
1431
1432 put_ui(bs, slice_header->ref_pic_list_modification_flag_l1, 1);
1433
1434 if (slice_header->ref_pic_list_modification_flag_l1) {
1435 for (i = 0; i <= slice_header->num_ref_idx_l1_active_minus1; i++) {
1436 put_ui(bs, slice_header->list_entry_l1[i],
1437 (uint8_t)(ceil(log(slice_header->num_poc_total_cur) / log(2.0))));
1438 }
1439 }
1440 }
1441
1442 if (slice_header->slice_type == SLICE_B) {
1443 put_ui(bs, slice_header->mvd_l1_zero_flag, 1);
1444 }
1445
1446 if (pps->cabac_init_present_flag) {
1447 put_ui(bs, slice_header->cabac_init_present_flag, 1);
1448 }
1449
1450 if (slice_header->slice_temporal_mvp_enabled_flag) {
1451 int collocated_from_l0_flag = 1;
1452
1453 if (slice_header->slice_type == SLICE_B) {
1454 collocated_from_l0_flag = slice_header->collocated_from_l0_flag;
1455 put_ui(bs, slice_header->collocated_from_l0_flag, 1);
1456 }
1457
1458 if (((collocated_from_l0_flag && (slice_header->num_ref_idx_l0_active_minus1 > 0)) ||
1459 (!collocated_from_l0_flag && (slice_header->num_ref_idx_l1_active_minus1 > 0)))) {
1460 put_ue(bs, slice_header->collocated_ref_idx);
1461 }
1462 }
1463
1464 put_ue(bs, slice_header->five_minus_max_num_merge_cand);
1465 }
1466
1467 put_se(bs, slice_header->slice_qp_delta);
1468
1469 if (pps->chroma_qp_offset_list_enabled_flag) {
1470 put_se(bs, slice_header->slice_qp_delta_cb);
1471 put_se(bs, slice_header->slice_qp_delta_cr);
1472 }
1473
1474 if (pps->deblocking_filter_override_enabled_flag) {
1475 put_ui(bs, slice_header->deblocking_filter_override_flag, 1);
1476 }
1477 if (slice_header->deblocking_filter_override_flag) {
1478 put_ui(bs, slice_header->disable_deblocking_filter_flag, 1);
1479
1480 if (!slice_header->disable_deblocking_filter_flag) {
1481 put_se(bs, slice_header->beta_offset_div2);
1482 put_se(bs, slice_header->tc_offset_div2);
1483 }
1484 }
1485
1486 if (pps->pps_loop_filter_across_slices_enabled_flag &&
1487 (slice_header->slice_sao_luma_flag || slice_header->slice_sao_chroma_flag ||
1488 !slice_header->disable_deblocking_filter_flag)) {
1489 put_ui(bs, slice_header->slice_loop_filter_across_slices_enabled_flag, 1);
1490 }
1491
1492 }
1493
1494 if ((pps->tiles_enabled_flag) || (pps->entropy_coding_sync_enabled_flag)) {
1495 put_ue(bs, slice_header->num_entry_point_offsets);
1496
1497 if (slice_header->num_entry_point_offsets > 0) {
1498 put_ue(bs, slice_header->offset_len_minus1);
1499 }
1500 }
1501
1502 if (pps->slice_segment_header_extension_present_flag) {
1503 int slice_header_extension_length = 0;
1504
1505 put_ue(bs, slice_header_extension_length);
1506 }
1507 }
1508
1509 static int
build_packed_pic_buffer(unsigned char ** header_buffer)1510 build_packed_pic_buffer(unsigned char **header_buffer)
1511 {
1512 bitstream bs;
1513
1514 bitstream_start(&bs);
1515 nal_start_code_prefix(&bs, NALU_PPS);
1516 nal_header(&bs, NALU_PPS);
1517 pps_rbsp(&bs);
1518 rbsp_trailing_bits(&bs);
1519 bitstream_end(&bs);
1520
1521 *header_buffer = (unsigned char *)bs.buffer;
1522 return bs.bit_offset;
1523 }
1524 static int
build_packed_video_buffer(unsigned char ** header_buffer)1525 build_packed_video_buffer(unsigned char **header_buffer)
1526 {
1527 bitstream bs;
1528
1529 bitstream_start(&bs);
1530 nal_start_code_prefix(&bs, NALU_VPS);
1531 nal_header(&bs, NALU_VPS);
1532 vps_rbsp(&bs);
1533 rbsp_trailing_bits(&bs);
1534 bitstream_end(&bs);
1535
1536 *header_buffer = (unsigned char *)bs.buffer;
1537 return bs.bit_offset;
1538 }
1539
1540 static int
build_packed_seq_buffer(unsigned char ** header_buffer)1541 build_packed_seq_buffer(unsigned char **header_buffer)
1542 {
1543 bitstream bs;
1544
1545 bitstream_start(&bs);
1546 nal_start_code_prefix(&bs, NALU_SPS);
1547 nal_header(&bs, NALU_SPS);
1548 sps_rbsp(&bs);
1549 rbsp_trailing_bits(&bs);
1550 bitstream_end(&bs);
1551
1552 *header_buffer = (unsigned char *)bs.buffer;
1553 return bs.bit_offset;
1554 }
1555
build_packed_slice_buffer(unsigned char ** header_buffer)1556 static int build_packed_slice_buffer(unsigned char **header_buffer)
1557 {
1558 bitstream bs;
1559 int is_idr = !!pic_param.pic_fields.bits.idr_pic_flag;
1560 int naluType = is_idr ? NALU_IDR_W_DLP : NALU_TRAIL_R;
1561
1562 bitstream_start(&bs);
1563 nal_start_code_prefix(&bs, NALU_TRAIL_R);
1564 nal_header(&bs, naluType);
1565 sliceHeader_rbsp(&bs, &ssh, &sps, &pps, 0);
1566 rbsp_trailing_bits(&bs);
1567 bitstream_end(&bs);
1568
1569 *header_buffer = (unsigned char *)bs.buffer;
1570 return bs.bit_offset;
1571 }
1572
1573
1574 /*
1575 * Helper function for profiling purposes
1576 */
GetTickCount()1577 static unsigned int GetTickCount()
1578 {
1579 struct timeval tv;
1580 if (gettimeofday(&tv, NULL))
1581 return 0;
1582 return tv.tv_usec / 1000 + tv.tv_sec * 1000;
1583 }
1584
1585 /*
1586 Assume frame sequence is: Frame#0,#1,#2,...,#M,...,#X,... (encoding order)
1587 1) period between Frame #X and Frame #N = #X - #N
1588 2) 0 means infinite for intra_period/intra_idr_period, and 0 is invalid for ip_period
1589 3) intra_idr_period % intra_period (intra_period > 0) and (intra_period -1)% ip_period must be 0
1590 4) intra_period and intra_idr_period take precedence over ip_period
1591 5) if ip_period > 1, intra_period and intra_idr_period are not the strict periods
1592 of I/IDR frames, see bellow examples
1593 -------------------------------------------------------------------
1594 intra_period intra_idr_period ip_period frame sequence (intra_period/intra_idr_period/ip_period)
1595 0 ignored 1 IDRPPPPPPP ... (No IDR/I any more)
1596 0 ignored >=2 IDR(PBB)(PBB)... (No IDR/I any more)
1597 1 0 ignored IDRIIIIIII... (No IDR any more)
1598 1 1 ignored IDR IDR IDR IDR...
1599 1 >=2 ignored IDRII IDRII IDR... (1/3/ignore)
1600 >=2 0 1 IDRPPP IPPP I... (3/0/1)
1601 >=2 0 >=2 IDR(PBB)(PBB)(IBB) (7/0/3)
1602 (PBB)(IBB)(PBB)(IBB)...
1603 >=2 >=2 1 IDRPPPPP IPPPPP IPPPPP (7/14/1)
1604 IDRPPPPP IPPPPP IPPPPP...
1605 >=2 >=2 >=2 {IDR(PBB)(PBB)(IBB)(PBB)(IBB)(PBB)} (7/14/3)
1606 {IDR(PBB)(PBB)(IBB)(PBB)(IBB)(PBB)}...
1607 {IDR(PBB)(PBB)(IBB)(PBB)} (7/14/3)
1608 {IDR(PBB)(PBB)(IBB)(PBB)}...
1609 {IDR(PBB)(PBB)} (7/7/3)
1610 {IDR(PBB)(PBB)}.
1611 */
1612
1613 /*
1614 * Return displaying order with specified periods and encoding order
1615 * displaying_order: displaying order
1616 * frame_type: frame type
1617 */
encoding2display_order(unsigned long long encoding_order,int intra_period,int intra_idr_period,int ip_period,unsigned long long * displaying_order,int * frame_type)1618 void encoding2display_order(
1619 unsigned long long encoding_order, int intra_period,
1620 int intra_idr_period, int ip_period,
1621 unsigned long long *displaying_order,
1622 int *frame_type)
1623 {
1624 int encoding_order_gop = 0;
1625
1626 if (intra_period == 1) { /* all are I/IDR frames */
1627 *displaying_order = encoding_order;
1628 if (intra_idr_period == 0)
1629 *frame_type = (encoding_order == 0) ? FRAME_IDR : FRAME_I;
1630 else
1631 *frame_type = (encoding_order % intra_idr_period == 0) ? FRAME_IDR : FRAME_I;
1632 return;
1633 }
1634
1635 if (intra_period == 0)
1636 intra_idr_period = 0;
1637
1638 /* new sequence like
1639 * IDR PPPPP IPPPPP
1640 * IDR (PBB)(PBB)(IBB)(PBB)
1641 */
1642 encoding_order_gop = (intra_idr_period == 0) ? encoding_order :
1643 (encoding_order % (intra_idr_period + ((ip_period == 1) ? 0 : 1)));
1644
1645 if (encoding_order_gop == 0) { /* the first frame */
1646 *frame_type = FRAME_IDR;
1647 *displaying_order = encoding_order;
1648 } else if (((encoding_order_gop - 1) % ip_period) != 0) { /* B frames */
1649 *frame_type = FRAME_B;
1650 *displaying_order = encoding_order - 1;
1651 } else if ((intra_period != 0) && /* have I frames */
1652 (encoding_order_gop >= 2) &&
1653 ((ip_period == 1 && encoding_order_gop % (intra_period - 1) == 0) || /* for IDR PPPPP IPPPP */
1654 /* for IDR (PBB)(PBB)(IBB) */
1655 (ip_period >= 2 && ((encoding_order_gop - 1) / ip_period % ((intra_period - 1) / ip_period)) == 0))) {
1656 *frame_type = FRAME_I;
1657 *displaying_order = encoding_order + ip_period - 1;
1658 } else {
1659 *frame_type = FRAME_P;
1660 *displaying_order = encoding_order + ip_period - 1;
1661 }
1662
1663
1664 }
1665
1666
fourcc_to_string(int fourcc)1667 static char *fourcc_to_string(int fourcc)
1668 {
1669 switch (fourcc) {
1670 case VA_FOURCC_NV12:
1671 return "NV12";
1672 case VA_FOURCC_IYUV:
1673 return "IYUV";
1674 case VA_FOURCC_YV12:
1675 return "YV12";
1676 case VA_FOURCC_UYVY:
1677 return "UYVY";
1678 default:
1679 return "Unknown";
1680 }
1681 }
1682
string_to_fourcc(char * str)1683 static int string_to_fourcc(char *str)
1684 {
1685 int fourcc;
1686
1687 if (!strncmp(str, "NV12", 4))
1688 fourcc = VA_FOURCC_NV12;
1689 else if (!strncmp(str, "IYUV", 4))
1690 fourcc = VA_FOURCC_IYUV;
1691 else if (!strncmp(str, "YV12", 4))
1692 fourcc = VA_FOURCC_YV12;
1693 else if (!strncmp(str, "UYVY", 4))
1694 fourcc = VA_FOURCC_UYVY;
1695 else {
1696 printf("Unknow FOURCC\n");
1697 fourcc = -1;
1698 }
1699 return fourcc;
1700 }
1701
1702
rc_to_string(int rcmode)1703 static char *rc_to_string(int rcmode)
1704 {
1705 switch (rc_mode) {
1706 case VA_RC_NONE:
1707 return "NONE";
1708 case VA_RC_CBR:
1709 return "CBR";
1710 case VA_RC_VBR:
1711 return "VBR";
1712 case VA_RC_VCM:
1713 return "VCM";
1714 case VA_RC_CQP:
1715 return "CQP";
1716 case VA_RC_VBR_CONSTRAINED:
1717 return "VBR_CONSTRAINED";
1718 default:
1719 return "Unknown";
1720 }
1721 }
1722
string_to_rc(char * str)1723 static int string_to_rc(char *str)
1724 {
1725 int rc_mode;
1726
1727 if (!strncmp(str, "NONE", 4))
1728 rc_mode = VA_RC_NONE;
1729 else if (!strncmp(str, "CBR", 3))
1730 rc_mode = VA_RC_CBR;
1731 else if (!strncmp(str, "VBR", 3))
1732 rc_mode = VA_RC_VBR;
1733 else if (!strncmp(str, "VCM", 3))
1734 rc_mode = VA_RC_VCM;
1735 else if (!strncmp(str, "CQP", 3))
1736 rc_mode = VA_RC_CQP;
1737 else if (!strncmp(str, "VBR_CONSTRAINED", 15))
1738 rc_mode = VA_RC_VBR_CONSTRAINED;
1739 else {
1740 printf("Unknown RC mode\n");
1741 rc_mode = -1;
1742 }
1743 return rc_mode;
1744 }
1745
1746
print_help(void)1747 static int print_help(void)
1748 {
1749 printf("./hevcencode <options>\n");
1750 printf(" -w <width> -h <height>\n");
1751 printf(" -framecount <frame number>\n");
1752 printf(" -n <frame number>\n");
1753 printf(" if set to 0 and srcyuv is set, the frame count is from srcuv file\n");
1754 printf(" -o <coded file>\n");
1755 printf(" -f <frame rate>\n");
1756 printf(" --intra_period <number>\n");
1757 printf(" --idr_period <number>\n");
1758 printf(" --ip_period <number>\n");
1759 printf(" --bitrate <bitrate> Kbits per second\n");
1760 printf(" --initialqp <number>\n");
1761 printf(" --minqp <number>\n");
1762 printf(" --rcmode <NONE|CBR|VBR|VCM|CQP|VBR_CONTRAINED>\n");
1763 printf(" --syncmode: sequentially upload source, encoding, save result, no multi-thread\n");
1764 printf(" --srcyuv <filename> load YUV from a file\n");
1765 printf(" --fourcc <NV12|IYUV|YV12> source YUV fourcc\n");
1766 printf(" --recyuv <filename> save reconstructed YUV into a file\n");
1767 printf(" --enablePSNR calculate PSNR of recyuv vs. srcyuv\n");
1768 printf(" --profile 1: main 2 : main10\n");
1769 printf(" --p2b 1: enable 0 : disalbe(defalut)\n");
1770 printf(" --lowpower 1: enable 0 : disalbe(defalut)\n");
1771 return 0;
1772 }
1773
process_cmdline(int argc,char * argv[])1774 static int process_cmdline(int argc, char *argv[])
1775 {
1776 int c;
1777 const struct option long_opts[] = {
1778 {"help", no_argument, NULL, 0 },
1779 {"bitrate", required_argument, NULL, 1 },
1780 {"minqp", required_argument, NULL, 2 },
1781 {"initialqp", required_argument, NULL, 3 },
1782 {"intra_period", required_argument, NULL, 4 },
1783 {"idr_period", required_argument, NULL, 5 },
1784 {"ip_period", required_argument, NULL, 6 },
1785 {"rcmode", required_argument, NULL, 7 },
1786 {"srcyuv", required_argument, NULL, 9 },
1787 {"recyuv", required_argument, NULL, 10 },
1788 {"fourcc", required_argument, NULL, 11 },
1789 {"syncmode", no_argument, NULL, 12 },
1790 {"enablePSNR", no_argument, NULL, 13 },
1791 {"prit", required_argument, NULL, 14 },
1792 {"priv", required_argument, NULL, 15 },
1793 {"framecount", required_argument, NULL, 16 },
1794 {"profile", required_argument, NULL, 17 },
1795 {"p2b", required_argument, NULL, 18 },
1796 {"lowpower", required_argument, NULL, 19 },
1797 {NULL, no_argument, NULL, 0 }
1798 };
1799 int long_index;
1800
1801 while ((c = getopt_long_only(argc, argv, "w:h:n:f:o:?", long_opts, &long_index)) != EOF) {
1802 switch (c) {
1803 case 'w':
1804 frame_width = atoi(optarg);
1805 break;
1806 case 'h':
1807 frame_height = atoi(optarg);
1808 break;
1809 case 'n':
1810 case 16:
1811 frame_count = atoi(optarg);
1812 break;
1813 case 'f':
1814 frame_rate = atoi(optarg);
1815 break;
1816 case 'o':
1817 if (coded_fn)
1818 free(coded_fn);
1819 coded_fn = strdup(optarg);
1820 break;
1821 case 0:
1822 print_help();
1823 exit(0);
1824 case 1:
1825 frame_bitrate = atoi(optarg)*1000;
1826 break;
1827 case 2:
1828 minimal_qp = atoi(optarg);
1829 break;
1830 case 3:
1831 initial_qp = atoi(optarg);
1832 break;
1833 case 4:
1834 intra_period = atoi(optarg);
1835 break;
1836 case 5:
1837 intra_idr_period = atoi(optarg);
1838 break;
1839 case 6:
1840 ip_period = atoi(optarg);
1841 break;
1842 case 7:
1843 rc_mode = string_to_rc(optarg);
1844 if (rc_mode < 0) {
1845 print_help();
1846 exit(1);
1847 }
1848 break;
1849 case 9:
1850 if (srcyuv_fn)
1851 free(srcyuv_fn);
1852 srcyuv_fn = strdup(optarg);
1853 break;
1854 case 10:
1855 if (recyuv_fn)
1856 free(recyuv_fn);
1857 recyuv_fn = strdup(optarg);
1858 break;
1859 case 11:
1860 srcyuv_fourcc = string_to_fourcc(optarg);
1861 if (srcyuv_fourcc <= 0) {
1862 print_help();
1863 exit(1);
1864 }
1865 break;
1866 case 12:
1867 encode_syncmode = 1;
1868 break;
1869 case 13:
1870 calc_psnr = 1;
1871 break;
1872 case 14:
1873 misc_priv_type = strtol(optarg, NULL, 0);
1874 break;
1875 case 15:
1876 misc_priv_value = strtol(optarg, NULL, 0);
1877 break;
1878 case 17:
1879 if (strncmp(optarg, "1", 1) == 0) {
1880 real_hevc_profile = 1;
1881 hevc_profile = VAProfileHEVCMain;
1882 } else if (strncmp(optarg, "2", 1) == 0) {
1883 real_hevc_profile = 2;
1884 hevc_profile = VAProfileHEVCMain10;
1885 } else
1886 hevc_profile = 0;
1887 break;
1888 case 18:
1889 p2b = atoi(optarg);
1890 break;
1891 case 19:
1892 lowpower = atoi(optarg);
1893 break;
1894
1895 case ':':
1896 case '?':
1897 print_help();
1898 exit(0);
1899 }
1900 }
1901
1902 if (ip_period < 1) {
1903 printf(" ip_period must be greater than 0\n");
1904 exit(0);
1905 }
1906 if (intra_period != 1 && (intra_period - 1) % ip_period != 0) {
1907 printf(" intra_period -1 must be a multiplier of ip_period\n");
1908 exit(0);
1909 }
1910 if (intra_period != 0 && intra_idr_period % intra_period != 0) {
1911 printf(" intra_idr_period must be a multiplier of intra_period\n");
1912 exit(0);
1913 }
1914 if (ip_period > 1) {
1915 frame_count -= (frame_count - 1) % ip_period;
1916 }
1917
1918 if (frame_bitrate == 0)
1919 frame_bitrate = (long long int) frame_width * frame_height * 12 * frame_rate / 50;
1920
1921 /* open source file */
1922 if (srcyuv_fn) {
1923 srcyuv_fp = fopen(srcyuv_fn, "r");
1924
1925 if (srcyuv_fp == NULL)
1926 printf("Open source YUV file %s failed, use auto-generated YUV data\n", srcyuv_fn);
1927 else {
1928 struct stat tmp;
1929
1930 int ret = fstat(fileno(srcyuv_fp), &tmp);
1931 CHECK_CONDITION(ret == 0);
1932 srcyuv_frames = tmp.st_size / (frame_width * frame_height * 1.5);
1933 printf("Source YUV file %s with %llu frames\n", srcyuv_fn, srcyuv_frames);
1934
1935 if (frame_count == 0)
1936 frame_count = srcyuv_frames;
1937 }
1938 }
1939
1940 /* open source file */
1941 if (recyuv_fn) {
1942 recyuv_fp = fopen(recyuv_fn, "w+");
1943
1944 if (recyuv_fp == NULL)
1945 printf("Open reconstructed YUV file %s failed\n", recyuv_fn);
1946 }
1947
1948 if (coded_fn == NULL) {
1949 struct stat buf;
1950 if (stat("/tmp", &buf) == 0)
1951 coded_fn = strdup("/tmp/test.265");
1952 else if (stat("/sdcard", &buf) == 0)
1953 coded_fn = strdup("/sdcard/test.265");
1954 else
1955 coded_fn = strdup("./test.265");
1956 }
1957
1958 /* store coded data into a file */
1959 if (coded_fn) {
1960 coded_fp = fopen(coded_fn, "w+");
1961 } else {
1962 printf("Copy file string failed");
1963 exit(1);
1964 }
1965 if (coded_fp == NULL) {
1966 printf("Open file %s failed, exit\n", coded_fn);
1967 exit(1);
1968 }
1969
1970 frame_width_aligned = (frame_width + 63) & (~63);
1971 frame_height_aligned = (frame_height + 63) & (~63);
1972 if (frame_width != frame_width_aligned ||
1973 frame_height != frame_height_aligned) {
1974 printf("Source frame is %dx%d and will code clip to %dx%d with crop\n",
1975 frame_width, frame_height,
1976 frame_width_aligned, frame_height_aligned
1977 );
1978 }
1979
1980 return 0;
1981 }
1982
init_va(void)1983 static int init_va(void)
1984 {
1985 VAProfile profile_list[] = {VAProfileHEVCMain, VAProfileHEVCMain10};
1986 VAEntrypoint *entrypoints;
1987 int num_entrypoints, slice_entrypoint;
1988 int support_encode = 0;
1989 int major_ver, minor_ver;
1990 VAStatus va_status;
1991 unsigned int i;
1992
1993 va_dpy = va_open_display();
1994 va_status = vaInitialize(va_dpy, &major_ver, &minor_ver);
1995 CHECK_VASTATUS(va_status, "vaInitialize");
1996
1997 num_entrypoints = vaMaxNumEntrypoints(va_dpy);
1998 entrypoints = malloc(num_entrypoints * sizeof(*entrypoints));
1999 if (!entrypoints) {
2000 fprintf(stderr, "error: failed to initialize VA entrypoints array\n");
2001 exit(1);
2002 }
2003
2004 /* use the highest profile */
2005 for (i = 0; i < sizeof(profile_list) / sizeof(profile_list[0]); i++) {
2006 if ((hevc_profile != ~0) && hevc_profile != profile_list[i])
2007 continue;
2008
2009 hevc_profile = profile_list[i];
2010 vaQueryConfigEntrypoints(va_dpy, hevc_profile, entrypoints, &num_entrypoints);
2011 for (slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) {
2012 if (entrypoints[slice_entrypoint] == VAEntrypointEncSlice ||
2013 entrypoints[slice_entrypoint] == VAEntrypointEncSliceLP ) {
2014 support_encode = 1;
2015 break;
2016 }
2017 }
2018 if (support_encode == 1)
2019 break;
2020 }
2021
2022 if (support_encode == 0) {
2023 printf("Can't find VAEntrypointEncSlice for HEVC profiles\n");
2024 exit(1);
2025 } else {
2026 switch (hevc_profile) {
2027 case VAProfileHEVCMain:
2028 hevc_profile = VAProfileHEVCMain;
2029 printf("Use profile VAProfileHEVCMain\n");
2030 break;
2031
2032 case VAProfileHEVCMain10:
2033 hevc_profile = VAProfileHEVCMain10;
2034 printf("Use profile VAProfileHEVCMain10\n");
2035 break;
2036 default:
2037 printf("unknow profile. Set to Main");
2038 hevc_profile = VAProfileHEVCMain;
2039 constraint_set_flag |= (1 << 0 | 1 << 1); /* Annex A.2.1 & A.2.2 */
2040 ip_period = 1;
2041 break;
2042 }
2043 }
2044
2045 /* find out the format for the render target, and rate control mode */
2046 for (i = 0; i < VAConfigAttribTypeMax; i++)
2047 attrib[i].type = i;
2048
2049 if (lowpower)
2050 {
2051 entryPoint = VAEntrypointEncSliceLP;
2052 LCU_SIZE = 64;
2053 }
2054
2055 va_status = vaGetConfigAttributes(va_dpy, hevc_profile, entryPoint,
2056 &attrib[0], VAConfigAttribTypeMax);
2057 CHECK_VASTATUS(va_status, "vaGetConfigAttributes");
2058 /* check the interested configattrib */
2059 if ((attrib[VAConfigAttribRTFormat].value & VA_RT_FORMAT_YUV420) == 0) {
2060 printf("Not find desired YUV420 RT format\n");
2061 exit(1);
2062 } else {
2063 config_attrib[config_attrib_num].type = VAConfigAttribRTFormat;
2064 config_attrib[config_attrib_num].value = VA_RT_FORMAT_YUV420;
2065 config_attrib_num++;
2066 }
2067
2068 if (attrib[VAConfigAttribRateControl].value != VA_ATTRIB_NOT_SUPPORTED) {
2069 int tmp = attrib[VAConfigAttribRateControl].value;
2070
2071 printf("Support rate control mode (0x%x):", tmp);
2072
2073 if (tmp & VA_RC_NONE)
2074 printf("NONE ");
2075 if (tmp & VA_RC_CBR)
2076 printf("CBR ");
2077 if (tmp & VA_RC_VBR)
2078 printf("VBR ");
2079 if (tmp & VA_RC_VCM)
2080 printf("VCM ");
2081 if (tmp & VA_RC_CQP)
2082 printf("CQP ");
2083 if (tmp & VA_RC_VBR_CONSTRAINED)
2084 printf("VBR_CONSTRAINED ");
2085
2086 printf("\n");
2087
2088 if (rc_mode == -1 || !(rc_mode & tmp)) {
2089 if (rc_mode != -1) {
2090 printf("Warning: Don't support the specified RateControl mode: %s!!!, switch to ", rc_to_string(rc_mode));
2091 }
2092
2093 for (i = 0; i < sizeof(rc_default_modes) / sizeof(rc_default_modes[0]); i++) {
2094 if (rc_default_modes[i] & tmp) {
2095 rc_mode = rc_default_modes[i];
2096 break;
2097 }
2098 }
2099
2100 printf("RateControl mode: %s\n", rc_to_string(rc_mode));
2101 }
2102
2103 config_attrib[config_attrib_num].type = VAConfigAttribRateControl;
2104 config_attrib[config_attrib_num].value = rc_mode;
2105 config_attrib_num++;
2106 }
2107
2108
2109 if (attrib[VAConfigAttribEncPackedHeaders].value != VA_ATTRIB_NOT_SUPPORTED) {
2110 int tmp = attrib[VAConfigAttribEncPackedHeaders].value;
2111
2112 printf("Support VAConfigAttribEncPackedHeaders\n");
2113
2114 hevc_packedheader = 1;
2115 config_attrib[config_attrib_num].type = VAConfigAttribEncPackedHeaders;
2116 config_attrib[config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE;
2117
2118 if (tmp & VA_ENC_PACKED_HEADER_SEQUENCE) {
2119 printf("Support packed sequence headers\n");
2120 config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_SEQUENCE;
2121 }
2122
2123 if (tmp & VA_ENC_PACKED_HEADER_PICTURE) {
2124 printf("Support packed picture headers\n");
2125 config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_PICTURE;
2126 }
2127
2128 if (tmp & VA_ENC_PACKED_HEADER_SLICE) {
2129 printf("Support packed slice headers\n");
2130 config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_SLICE;
2131 }
2132
2133 if (tmp & VA_ENC_PACKED_HEADER_MISC) {
2134 printf("Support packed misc headers\n");
2135 config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_MISC;
2136 }
2137
2138 enc_packed_header_idx = config_attrib_num;
2139 config_attrib_num++;
2140 }
2141
2142 if (attrib[VAConfigAttribEncInterlaced].value != VA_ATTRIB_NOT_SUPPORTED) {
2143 int tmp = attrib[VAConfigAttribEncInterlaced].value;
2144
2145 printf("Support VAConfigAttribEncInterlaced\n");
2146
2147 if (tmp & VA_ENC_INTERLACED_FRAME)
2148 printf("support VA_ENC_INTERLACED_FRAME\n");
2149 if (tmp & VA_ENC_INTERLACED_FIELD)
2150 printf("Support VA_ENC_INTERLACED_FIELD\n");
2151 if (tmp & VA_ENC_INTERLACED_MBAFF)
2152 printf("Support VA_ENC_INTERLACED_MBAFF\n");
2153 if (tmp & VA_ENC_INTERLACED_PAFF)
2154 printf("Support VA_ENC_INTERLACED_PAFF\n");
2155
2156 config_attrib[config_attrib_num].type = VAConfigAttribEncInterlaced;
2157 config_attrib[config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE;
2158 config_attrib_num++;
2159 }
2160
2161 if (attrib[VAConfigAttribEncMaxRefFrames].value != VA_ATTRIB_NOT_SUPPORTED) {
2162 hevc_maxref = attrib[VAConfigAttribEncMaxRefFrames].value;
2163
2164 printf("Support %d RefPicList0 and %d RefPicList1\n",
2165 hevc_maxref & 0xffff, (hevc_maxref >> 16) & 0xffff);
2166 }
2167
2168 if (attrib[VAConfigAttribEncMaxSlices].value != VA_ATTRIB_NOT_SUPPORTED)
2169 printf("Support %d slices\n", attrib[VAConfigAttribEncMaxSlices].value);
2170
2171 if (attrib[VAConfigAttribEncSliceStructure].value != VA_ATTRIB_NOT_SUPPORTED) {
2172 int tmp = attrib[VAConfigAttribEncSliceStructure].value;
2173
2174 printf("Support VAConfigAttribEncSliceStructure\n");
2175
2176 if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS)
2177 printf("Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS\n");
2178 if (tmp & VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS)
2179 printf("Support VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS\n");
2180 if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS)
2181 printf("Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS\n");
2182 }
2183 if (attrib[VAConfigAttribEncMacroblockInfo].value != VA_ATTRIB_NOT_SUPPORTED) {
2184 printf("Support VAConfigAttribEncMacroblockInfo\n");
2185 }
2186 if (attrib[VAConfigAttribEncHEVCBlockSizes].value != VA_ATTRIB_NOT_SUPPORTED) {
2187 printf("Support VAConfigAttribEncHEVCBlockSizes\n");
2188 uint32_t tmp = attrib[VAConfigAttribEncHEVCBlockSizes].value;
2189 VAConfigAttribValEncHEVCBlockSizes bs = { .value = tmp };
2190 block_sizes.log2_max_coding_tree_block_size_minus3 = bs.bits.log2_max_coding_tree_block_size_minus3;
2191 block_sizes.log2_min_coding_tree_block_size_minus3 = bs.bits.log2_min_coding_tree_block_size_minus3;
2192 block_sizes.log2_min_luma_coding_block_size_minus3 = bs.bits.log2_min_luma_coding_block_size_minus3;
2193 block_sizes.log2_max_luma_transform_block_size_minus2 = bs.bits.log2_max_luma_transform_block_size_minus2;
2194 block_sizes.log2_min_luma_transform_block_size_minus2 = bs.bits.log2_min_luma_transform_block_size_minus2;
2195 block_sizes.log2_max_pcm_coding_block_size_minus3 = bs.bits.log2_max_pcm_coding_block_size_minus3;
2196 block_sizes.log2_min_pcm_coding_block_size_minus3 = bs.bits.log2_min_pcm_coding_block_size_minus3;
2197 block_sizes.max_max_transform_hierarchy_depth_inter = bs.bits.max_max_transform_hierarchy_depth_inter;
2198 block_sizes.min_max_transform_hierarchy_depth_inter = bs.bits.min_max_transform_hierarchy_depth_inter;
2199 block_sizes.max_max_transform_hierarchy_depth_intra = bs.bits.max_max_transform_hierarchy_depth_intra;
2200 block_sizes.min_max_transform_hierarchy_depth_intra = bs.bits.min_max_transform_hierarchy_depth_intra;
2201
2202 use_block_sizes = 1;
2203 config_attrib[config_attrib_num].type = VAConfigAttribEncHEVCBlockSizes;
2204 config_attrib[config_attrib_num].value = tmp;
2205 config_attrib_num++;
2206 }
2207 if (attrib[VAConfigAttribEncHEVCFeatures].value != VA_ATTRIB_NOT_SUPPORTED) {
2208 printf("Support VAConfigAttribEncHEVCFeatures\n");
2209 uint32_t tmp = attrib[VAConfigAttribEncHEVCFeatures].value;
2210 VAConfigAttribValEncHEVCFeatures f = { .value = tmp };
2211 features.amp = f.bits.amp;
2212 features.constrained_intra_pred = f.bits.constrained_intra_pred;
2213 features.cu_qp_delta = f.bits.cu_qp_delta;
2214 features.deblocking_filter_disable = f.bits.deblocking_filter_disable;
2215 features.dependent_slices = f.bits.dependent_slices;
2216 features.pcm = f.bits.pcm;
2217 features.sao = f.bits.sao;
2218 features.scaling_lists = f.bits.scaling_lists;
2219 features.separate_colour_planes = f.bits.separate_colour_planes;
2220 features.sign_data_hiding = f.bits.sign_data_hiding;
2221 features.strong_intra_smoothing = f.bits.strong_intra_smoothing;
2222 features.temporal_mvp = f.bits.temporal_mvp;
2223 features.transform_skip = f.bits.transform_skip;
2224 features.transquant_bypass = f.bits.transquant_bypass;
2225 features.weighted_prediction = f.bits.weighted_prediction;
2226
2227 use_features = 1;
2228 config_attrib[config_attrib_num].type = VAConfigAttribEncHEVCFeatures;
2229 config_attrib[config_attrib_num].value = attrib[VAConfigAttribEncHEVCFeatures].value;
2230 config_attrib_num++;
2231 }
2232
2233 free(entrypoints);
2234 return 0;
2235 }
2236
setup_encode()2237 static int setup_encode()
2238 {
2239 VAStatus va_status;
2240 VASurfaceID *tmp_surfaceid;
2241 int codedbuf_size, i;
2242
2243 va_status = vaCreateConfig(va_dpy, hevc_profile, entryPoint,
2244 &config_attrib[0], config_attrib_num, &config_id);
2245 CHECK_VASTATUS(va_status, "vaCreateConfig");
2246
2247 /* create source surfaces */
2248 va_status = vaCreateSurfaces(va_dpy,
2249 VA_RT_FORMAT_YUV420, frame_width_aligned, frame_height_aligned,
2250 &src_surface[0], SURFACE_NUM,
2251 NULL, 0);
2252 CHECK_VASTATUS(va_status, "vaCreateSurfaces");
2253
2254 /* create reference surfaces */
2255 va_status = vaCreateSurfaces(
2256 va_dpy,
2257 VA_RT_FORMAT_YUV420, frame_width_aligned, frame_height_aligned,
2258 &ref_surface[0], SURFACE_NUM,
2259 NULL, 0
2260 );
2261 CHECK_VASTATUS(va_status, "vaCreateSurfaces");
2262
2263 tmp_surfaceid = calloc(2 * SURFACE_NUM, sizeof(VASurfaceID));
2264 if (tmp_surfaceid) {
2265 memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID));
2266 memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID));
2267 }
2268
2269 /* Create a context for this encode pipe */
2270 va_status = vaCreateContext(va_dpy, config_id,
2271 frame_width_aligned, frame_height_aligned,
2272 VA_PROGRESSIVE,
2273 tmp_surfaceid, 2 * SURFACE_NUM,
2274 &context_id);
2275 CHECK_VASTATUS(va_status, "vaCreateContext");
2276 free(tmp_surfaceid);
2277
2278 codedbuf_size = ((long long int) frame_width_aligned * frame_height_aligned * 400) / (16 * 16);
2279
2280 for (i = 0; i < SURFACE_NUM; i++) {
2281 /* create coded buffer once for all
2282 * other VA buffers which won't be used again after vaRenderPicture.
2283 * so APP can always vaCreateBuffer for every frame
2284 * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture
2285 * so VA won't maintain the coded buffer
2286 */
2287 va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType,
2288 codedbuf_size, 1, NULL, &coded_buf[i]);
2289 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2290 }
2291
2292 return 0;
2293 }
2294
2295
2296
2297 #define partition(ref, field, key, ascending) \
2298 while (i <= j) { \
2299 if (ascending) { \
2300 while (ref[i].field < key) \
2301 i++; \
2302 while (ref[j].field > key) \
2303 j--; \
2304 } else { \
2305 while (ref[i].field > key) \
2306 i++; \
2307 while (ref[j].field < key) \
2308 j--; \
2309 } \
2310 if (i <= j) { \
2311 tmp = ref[i]; \
2312 ref[i] = ref[j]; \
2313 ref[j] = tmp; \
2314 i++; \
2315 j--; \
2316 } \
2317 } \
2318
sort_one(VAPictureHEVC ref[],int left,int right,int ascending)2319 static void sort_one(VAPictureHEVC ref[], int left, int right,
2320 int ascending)
2321 {
2322 VAPictureHEVC tmp;
2323 int i = left, j = right;
2324 unsigned int key = ref[(left + right) / 2].pic_order_cnt;
2325 partition(ref, pic_order_cnt, (signed int)key, ascending);
2326
2327 /* recursion */
2328 if (left < j)
2329 sort_one(ref, left, j, ascending);
2330
2331 if (i < right)
2332 sort_one(ref, i, right, ascending);
2333 }
2334
sort_two(VAPictureHEVC ref[],int left,int right,unsigned int key,int partition_ascending,int list0_ascending,int list1_ascending)2335 static void sort_two(VAPictureHEVC ref[], int left, int right, unsigned int key,
2336 int partition_ascending, int list0_ascending, int list1_ascending)
2337 {
2338 VAPictureHEVC tmp;
2339 int i = left, j = right;
2340
2341 partition(ref, pic_order_cnt, (signed int)key, partition_ascending);
2342
2343 sort_one(ref, left, i - 1, list0_ascending);
2344 sort_one(ref, j + 1, right, list1_ascending);
2345 }
2346
update_ReferenceFrames(void)2347 static int update_ReferenceFrames(void)
2348 {
2349 int i;
2350
2351 if (current_frame_type == FRAME_B)
2352 return 0;
2353
2354 numShortTerm++;
2355 if (numShortTerm > num_ref_frames)
2356 numShortTerm = num_ref_frames;
2357 for (i = numShortTerm - 1; i > 0; i--)
2358 ReferenceFrames[i] = ReferenceFrames[i - 1];
2359 ReferenceFrames[0] = CurrentCurrPic;
2360
2361 return 0;
2362 }
2363
update_RefPicList(void)2364 static int update_RefPicList(void)
2365 {
2366 unsigned int current_poc = CurrentCurrPic.pic_order_cnt;
2367
2368 if (current_frame_type == FRAME_P) {
2369 memcpy(RefPicList0_P, ReferenceFrames, numShortTerm * sizeof(VAPictureHEVC));
2370 sort_one(RefPicList0_P, 0, numShortTerm - 1, 0);
2371 }
2372
2373 if (current_frame_type == FRAME_B) {
2374 memcpy(RefPicList0_B, ReferenceFrames, numShortTerm * sizeof(VAPictureHEVC));
2375 sort_two(RefPicList0_B, 0, numShortTerm - 1, current_poc, 1, 0, 1);
2376
2377 memcpy(RefPicList1_B, ReferenceFrames, numShortTerm * sizeof(VAPictureHEVC));
2378 sort_two(RefPicList1_B, 0, numShortTerm - 1, current_poc, 0, 1, 0);
2379 }
2380
2381 return 0;
2382 }
2383
2384
render_sequence(struct SeqParamSet * sps)2385 static int render_sequence(struct SeqParamSet *sps)
2386 {
2387
2388 VABufferID seq_param_buf = VA_INVALID_ID;
2389 VABufferID rc_param_buf = VA_INVALID_ID;
2390 VABufferID misc_param_tmpbuf = VA_INVALID_ID;
2391 VABufferID render_id[2] = {VA_INVALID_ID};
2392 VAStatus va_status;
2393 VAEncMiscParameterBuffer *misc_param, *misc_param_tmp;
2394 VAEncMiscParameterRateControl *misc_rate_ctrl;
2395 seq_param.general_profile_idc = sps->ptps.general_profile_idc;
2396 seq_param.general_level_idc = sps->ptps.general_level_idc;
2397 seq_param.general_tier_flag = (uint8_t)(sps->ptps.general_tier_flag);
2398
2399 seq_param.intra_period = intra_period;
2400 seq_param.intra_idr_period = intra_idr_period;
2401 seq_param.ip_period = ip_period;
2402
2403 seq_param.bits_per_second = frame_bitrate;
2404 seq_param.pic_width_in_luma_samples = sps->pic_width_in_luma_samples;
2405 seq_param.pic_height_in_luma_samples = sps->pic_height_in_luma_samples;
2406
2407 seq_param.seq_fields.bits.chroma_format_idc = 1;
2408 seq_param.seq_fields.bits.separate_colour_plane_flag = 0;
2409 seq_param.seq_fields.bits.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
2410 seq_param.seq_fields.bits.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
2411 seq_param.seq_fields.bits.scaling_list_enabled_flag = sps->scaling_list_enabled_flag;
2412 seq_param.seq_fields.bits.strong_intra_smoothing_enabled_flag = sps->strong_intra_smoothing_enabled_flag;
2413 seq_param.seq_fields.bits.amp_enabled_flag = sps->amp_enabled_flag;
2414 seq_param.seq_fields.bits.sample_adaptive_offset_enabled_flag = sps->sample_adaptive_offset_enabled_flag;
2415 seq_param.seq_fields.bits.pcm_enabled_flag = sps->pcm_enabled_flag;
2416 seq_param.seq_fields.bits.pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled_flag;
2417 seq_param.seq_fields.bits.sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag;
2418
2419 seq_param.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
2420 seq_param.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
2421 seq_param.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
2422 seq_param.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
2423 seq_param.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
2424 seq_param.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
2425
2426 seq_param.vui_parameters_present_flag = sps->vui_parameters_present_flag;
2427
2428 va_status = vaCreateBuffer(va_dpy, context_id,
2429 VAEncSequenceParameterBufferType,
2430 sizeof(seq_param), 1, &seq_param, &seq_param_buf);
2431 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2432
2433 va_status = vaCreateBuffer(va_dpy, context_id,
2434 VAEncMiscParameterBufferType,
2435 sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterRateControl),
2436 1, NULL, &rc_param_buf);
2437 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2438
2439 vaMapBuffer(va_dpy, rc_param_buf, (void **)&misc_param);
2440 misc_param->type = VAEncMiscParameterTypeRateControl;
2441 misc_rate_ctrl = (VAEncMiscParameterRateControl *)misc_param->data;
2442 memset(misc_rate_ctrl, 0, sizeof(*misc_rate_ctrl));
2443 misc_rate_ctrl->bits_per_second = frame_bitrate;
2444 misc_rate_ctrl->target_percentage = 66;
2445 misc_rate_ctrl->window_size = 1000;
2446 misc_rate_ctrl->initial_qp = initial_qp;
2447 misc_rate_ctrl->min_qp = minimal_qp;
2448 misc_rate_ctrl->basic_unit_size = 0;
2449 vaUnmapBuffer(va_dpy, rc_param_buf);
2450
2451 render_id[0] = seq_param_buf;
2452 render_id[1] = rc_param_buf;
2453
2454 va_status = vaRenderPicture(va_dpy, context_id, &render_id[0], 2);
2455 CHECK_VASTATUS(va_status, "vaRenderPicture");
2456 if (seq_param_buf != VA_INVALID_ID) {
2457 vaDestroyBuffer(va_dpy, seq_param_buf);
2458 seq_param_buf = VA_INVALID_ID;
2459 }
2460
2461 if (rc_param_buf != VA_INVALID_ID) {
2462 vaDestroyBuffer(va_dpy, rc_param_buf);
2463 rc_param_buf = VA_INVALID_ID;
2464 }
2465
2466
2467 if (misc_priv_type != 0) {
2468 va_status = vaCreateBuffer(va_dpy, context_id,
2469 VAEncMiscParameterBufferType,
2470 sizeof(VAEncMiscParameterBuffer),
2471 1, NULL, &misc_param_tmpbuf);
2472 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2473 vaMapBuffer(va_dpy, misc_param_tmpbuf, (void **)&misc_param_tmp);
2474 misc_param_tmp->type = misc_priv_type;
2475 misc_param_tmp->data[0] = misc_priv_value;
2476 vaUnmapBuffer(va_dpy, misc_param_tmpbuf);
2477
2478 va_status = vaRenderPicture(va_dpy, context_id, &misc_param_tmpbuf, 1);
2479 }
2480
2481 return 0;
2482 }
2483
render_picture(struct PicParamSet * pps)2484 static int render_picture(struct PicParamSet *pps)
2485 {
2486 VABufferID pic_param_buf = VA_INVALID_ID;
2487 VAStatus va_status;
2488 int i = 0;
2489
2490 memcpy(pic_param.reference_frames, ReferenceFrames, numShortTerm * sizeof(VAPictureHEVC));
2491 for (i = numShortTerm; i < SURFACE_NUM - 1; i++) {
2492 pic_param.reference_frames[i].picture_id = VA_INVALID_SURFACE;
2493 pic_param.reference_frames[i].flags = VA_PICTURE_HEVC_INVALID;
2494 }
2495
2496 pic_param.last_picture = 0;
2497 pic_param.last_picture |= ((current_frame_encoding + 1) % intra_period == 0) ? HEVC_LAST_PICTURE_EOSEQ : 0;
2498 pic_param.last_picture |= ((current_frame_encoding + 1) == frame_count) ? HEVC_LAST_PICTURE_EOSTREAM : 0;
2499 pic_param.coded_buf = coded_buf[current_slot];
2500
2501 pic_param.decoded_curr_pic.picture_id = ref_surface[current_slot];
2502 pic_param.decoded_curr_pic.pic_order_cnt = calc_poc((current_frame_display - current_IDR_display) % MaxPicOrderCntLsb) * 2;
2503 pic_param.decoded_curr_pic.flags = 0;
2504 CurrentCurrPic = pic_param.decoded_curr_pic;
2505
2506 pic_param.collocated_ref_pic_index = pps->num_ref_idx_l0_default_active_minus1;
2507 pic_param.pic_init_qp = pps->init_qp_minus26 + 26;
2508 pic_param.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
2509 pic_param.pps_cb_qp_offset = pps->pps_cb_qp_offset;
2510 pic_param.pps_cr_qp_offset = pps->pps_cr_qp_offset;
2511
2512 pic_param.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
2513 pic_param.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
2514 for (i = 0; i <= (unsigned int)(pic_param.num_tile_columns_minus1); i++) {
2515 pic_param.column_width_minus1[i] = 0;
2516 }
2517 for (i = 0; i <= (unsigned int)(pic_param.num_tile_rows_minus1); i++) {
2518 pic_param.row_height_minus1[i] = 0;
2519 }
2520
2521 pic_param.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
2522 pic_param.ctu_max_bitsize_allowed = 0;
2523 pic_param.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
2524 pic_param.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
2525 pic_param.slice_pic_parameter_set_id = 0;
2526 pic_param.pic_fields.bits.idr_pic_flag = (current_frame_type == FRAME_IDR);
2527 pic_param.pic_fields.bits.coding_type = current_frame_type == FRAME_IDR ? FRAME_I : current_frame_type;
2528 pic_param.pic_fields.bits.reference_pic_flag = current_frame_type != FRAME_B ? 1 : 0;
2529 pic_param.pic_fields.bits.dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag;
2530 pic_param.pic_fields.bits.sign_data_hiding_enabled_flag = pps->sign_data_hiding_enabled_flag;
2531 pic_param.pic_fields.bits.constrained_intra_pred_flag = pps->constrained_intra_pred_flag;
2532 pic_param.pic_fields.bits.transform_skip_enabled_flag = pps->transform_skip_enabled_flag;
2533 pic_param.pic_fields.bits.cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag;
2534 pic_param.pic_fields.bits.weighted_pred_flag = pps->weighted_pred_flag;
2535 pic_param.pic_fields.bits.weighted_bipred_flag = pps->weighted_bipred_flag;
2536 pic_param.pic_fields.bits.transquant_bypass_enabled_flag = pps->transquant_bypass_enabled_flag;
2537 pic_param.pic_fields.bits.tiles_enabled_flag = pps->tiles_enabled_flag;
2538 pic_param.pic_fields.bits.entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag;
2539 pic_param.pic_fields.bits.loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag;
2540 pic_param.pic_fields.bits.pps_loop_filter_across_slices_enabled_flag = pps->pps_loop_filter_across_slices_enabled_flag;
2541 pic_param.pic_fields.bits.scaling_list_data_present_flag = pps->pps_scaling_list_data_present_flag;
2542
2543 va_status = vaCreateBuffer(va_dpy, context_id, VAEncPictureParameterBufferType,
2544 sizeof(pic_param), 1, &pic_param, &pic_param_buf);
2545 CHECK_VASTATUS(va_status, "vaCreateBuffer");;
2546
2547 va_status = vaRenderPicture(va_dpy, context_id, &pic_param_buf, 1);
2548 CHECK_VASTATUS(va_status, "vaRenderPicture");
2549
2550 if (pic_param_buf != VA_INVALID_ID) {
2551 vaDestroyBuffer(va_dpy, pic_param_buf);
2552 pic_param_buf = VA_INVALID_ID;
2553 }
2554
2555 return 0;
2556 }
2557
render_packedvideo(void)2558 static int render_packedvideo(void)
2559 {
2560
2561 VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
2562 VABufferID packedvideo_para_bufid = VA_INVALID_ID;
2563 VABufferID packedvideo_data_bufid = VA_INVALID_ID;
2564 VABufferID render_id[2] = {VA_INVALID_ID};
2565 unsigned int length_in_bits;
2566 unsigned char *packedvideo_buffer = NULL;
2567 VAStatus va_status;
2568
2569 length_in_bits = build_packed_video_buffer(&packedvideo_buffer);
2570
2571 packedheader_param_buffer.type = VAEncPackedHeaderSequence;
2572
2573 packedheader_param_buffer.bit_length = length_in_bits; /*length_in_bits*/
2574 packedheader_param_buffer.has_emulation_bytes = 0;
2575 va_status = vaCreateBuffer(va_dpy,
2576 context_id,
2577 VAEncPackedHeaderParameterBufferType,
2578 sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer,
2579 &packedvideo_para_bufid);
2580 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2581
2582 va_status = vaCreateBuffer(va_dpy,
2583 context_id,
2584 VAEncPackedHeaderDataBufferType,
2585 (length_in_bits + 7) / 8, 1, packedvideo_buffer,
2586 &packedvideo_data_bufid);
2587 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2588
2589 render_id[0] = packedvideo_para_bufid;
2590 render_id[1] = packedvideo_data_bufid;
2591 va_status = vaRenderPicture(va_dpy, context_id, render_id, 2);
2592 CHECK_VASTATUS(va_status, "vaRenderPicture");
2593
2594 free(packedvideo_buffer);
2595
2596 if (packedvideo_para_bufid != VA_INVALID_ID) {
2597 vaDestroyBuffer(va_dpy, packedvideo_para_bufid);
2598 packedvideo_para_bufid = VA_INVALID_ID;
2599 }
2600 if (packedvideo_data_bufid != VA_INVALID_ID) {
2601 vaDestroyBuffer(va_dpy, packedvideo_data_bufid);
2602 packedvideo_data_bufid = VA_INVALID_ID;
2603 }
2604
2605 return 0;
2606 }
2607
render_packedsequence(void)2608 static int render_packedsequence(void)
2609 {
2610 VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
2611 VABufferID packedseq_para_bufid = VA_INVALID_ID;
2612 VABufferID packedseq_data_bufid = VA_INVALID_ID;
2613 VABufferID render_id[2] = {VA_INVALID_ID};
2614 unsigned int length_in_bits;
2615 unsigned char *packedseq_buffer = NULL;
2616 VAStatus va_status;
2617
2618 length_in_bits = build_packed_seq_buffer(&packedseq_buffer);
2619
2620 packedheader_param_buffer.type = VAEncPackedHeaderSequence;
2621
2622 packedheader_param_buffer.bit_length = length_in_bits; /*length_in_bits*/
2623 packedheader_param_buffer.has_emulation_bytes = 0;
2624 va_status = vaCreateBuffer(va_dpy,
2625 context_id,
2626 VAEncPackedHeaderParameterBufferType,
2627 sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer,
2628 &packedseq_para_bufid);
2629 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2630
2631 va_status = vaCreateBuffer(va_dpy,
2632 context_id,
2633 VAEncPackedHeaderDataBufferType,
2634 (length_in_bits + 7) / 8, 1, packedseq_buffer,
2635 &packedseq_data_bufid);
2636 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2637
2638 render_id[0] = packedseq_para_bufid;
2639 render_id[1] = packedseq_data_bufid;
2640 va_status = vaRenderPicture(va_dpy, context_id, render_id, 2);
2641 CHECK_VASTATUS(va_status, "vaRenderPicture");
2642
2643 free(packedseq_buffer);
2644
2645 if (packedseq_para_bufid != VA_INVALID_ID) {
2646 vaDestroyBuffer(va_dpy, packedseq_para_bufid);
2647 packedseq_para_bufid = VA_INVALID_ID;
2648 }
2649 if (packedseq_data_bufid != VA_INVALID_ID) {
2650 vaDestroyBuffer(va_dpy, packedseq_data_bufid);
2651 packedseq_para_bufid = VA_INVALID_ID;
2652 }
2653
2654 return 0;
2655 }
2656
2657
render_packedpicture(void)2658 static int render_packedpicture(void)
2659 {
2660 VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
2661 VABufferID packedpic_para_bufid = VA_INVALID_ID;
2662 VABufferID packedpic_data_bufid = VA_INVALID_ID;
2663 VABufferID render_id[2] = {VA_INVALID_ID};
2664 unsigned int length_in_bits;
2665 unsigned char *packedpic_buffer = NULL;
2666 VAStatus va_status;
2667
2668 length_in_bits = build_packed_pic_buffer(&packedpic_buffer);
2669 packedheader_param_buffer.type = VAEncPackedHeaderPicture;
2670 packedheader_param_buffer.bit_length = length_in_bits;
2671 packedheader_param_buffer.has_emulation_bytes = 0;
2672
2673 va_status = vaCreateBuffer(va_dpy,
2674 context_id,
2675 VAEncPackedHeaderParameterBufferType,
2676 sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer,
2677 &packedpic_para_bufid);
2678 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2679
2680 va_status = vaCreateBuffer(va_dpy,
2681 context_id,
2682 VAEncPackedHeaderDataBufferType,
2683 (length_in_bits + 7) / 8, 1, packedpic_buffer,
2684 &packedpic_data_bufid);
2685 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2686
2687 render_id[0] = packedpic_para_bufid;
2688 render_id[1] = packedpic_data_bufid;
2689 va_status = vaRenderPicture(va_dpy, context_id, render_id, 2);
2690 CHECK_VASTATUS(va_status, "vaRenderPicture");
2691
2692 free(packedpic_buffer);
2693
2694 if (packedpic_para_bufid != VA_INVALID_ID) {
2695 vaDestroyBuffer(va_dpy, packedpic_para_bufid);
2696 packedpic_para_bufid = VA_INVALID_ID;
2697 }
2698 if (packedpic_data_bufid != VA_INVALID_ID) {
2699 vaDestroyBuffer(va_dpy, packedpic_data_bufid);
2700 packedpic_para_bufid = VA_INVALID_ID;
2701 }
2702
2703 return 0;
2704 }
2705
render_packedslice()2706 static void render_packedslice()
2707 {
2708 VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
2709 VABufferID packedslice_para_bufid = VA_INVALID_ID;
2710 VABufferID packedslice_data_bufid = VA_INVALID_ID;
2711 VABufferID render_id[2] = {VA_INVALID_ID};
2712 unsigned int length_in_bits;
2713 unsigned char *packedslice_buffer = NULL;
2714 VAStatus va_status;
2715
2716 length_in_bits = build_packed_slice_buffer(&packedslice_buffer);
2717 packedheader_param_buffer.type = VAEncPackedHeaderSlice;
2718 packedheader_param_buffer.bit_length = length_in_bits;
2719 packedheader_param_buffer.has_emulation_bytes = 0;
2720
2721 va_status = vaCreateBuffer(va_dpy,
2722 context_id,
2723 VAEncPackedHeaderParameterBufferType,
2724 sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer,
2725 &packedslice_para_bufid);
2726 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2727
2728 va_status = vaCreateBuffer(va_dpy,
2729 context_id,
2730 VAEncPackedHeaderDataBufferType,
2731 (length_in_bits + 7) / 8, 1, packedslice_buffer,
2732 &packedslice_data_bufid);
2733 CHECK_VASTATUS(va_status, "vaCreateBuffer");
2734
2735 render_id[0] = packedslice_para_bufid;
2736 render_id[1] = packedslice_data_bufid;
2737 va_status = vaRenderPicture(va_dpy, context_id, render_id, 2);
2738 CHECK_VASTATUS(va_status, "vaRenderPicture");
2739
2740 free(packedslice_buffer);
2741
2742 if (packedslice_para_bufid != VA_INVALID_ID) {
2743 vaDestroyBuffer(va_dpy, packedslice_para_bufid);
2744 packedslice_para_bufid = VA_INVALID_ID;
2745 }
2746 if (packedslice_data_bufid != VA_INVALID_ID) {
2747 vaDestroyBuffer(va_dpy, packedslice_data_bufid);
2748 packedslice_para_bufid = VA_INVALID_ID;
2749 }
2750 }
2751
render_slice(void)2752 static int render_slice(void)
2753 {
2754 VABufferID slice_param_buf = VA_INVALID_ID;
2755 VAStatus va_status;
2756 memset(&slice_param, 0x00, sizeof(VAEncSliceParameterBufferHEVC));
2757
2758 update_RefPicList();
2759
2760 slice_param.slice_segment_address = 0;
2761 slice_param.num_ctu_in_slice = ssh.picture_width_in_ctus * ssh.picture_height_in_ctus;
2762 slice_param.slice_type = ssh.slice_type;
2763 slice_param.slice_pic_parameter_set_id = ssh.slice_pic_parameter_set_id; // right???
2764
2765 slice_param.num_ref_idx_l0_active_minus1 = ssh.num_ref_idx_l0_active_minus1;
2766 slice_param.num_ref_idx_l1_active_minus1 = ssh.num_ref_idx_l1_active_minus1;
2767 memset(slice_param.ref_pic_list0, 0xff, sizeof(slice_param.ref_pic_list0));
2768 memset(slice_param.ref_pic_list1, 0xff, sizeof(slice_param.ref_pic_list1));
2769
2770 if (current_frame_type == FRAME_P) {
2771 memcpy(slice_param.ref_pic_list0, RefPicList0_P, sizeof(VAPictureHEVC));
2772 if (p2b) {
2773 memcpy(slice_param.ref_pic_list1, RefPicList0_P, sizeof(VAPictureHEVC));
2774 }
2775 } else if (current_frame_type == FRAME_B) {
2776 memcpy(slice_param.ref_pic_list0, RefPicList0_B, sizeof(VAPictureHEVC));
2777 memcpy(slice_param.ref_pic_list1, RefPicList1_B, sizeof(VAPictureHEVC));
2778 }
2779
2780 slice_param.luma_log2_weight_denom = 0;
2781 slice_param.delta_chroma_log2_weight_denom = 0;
2782
2783 slice_param.max_num_merge_cand = 5 - ssh.five_minus_max_num_merge_cand;
2784
2785 slice_param.slice_qp_delta = ssh.slice_qp_delta;
2786 slice_param.slice_cb_qp_offset = 0;
2787 slice_param.slice_cr_qp_offset = 0;
2788 slice_param.slice_beta_offset_div2 = ssh.beta_offset_div2;
2789 slice_param.slice_tc_offset_div2 = ssh.tc_offset_div2;
2790
2791 slice_param.slice_fields.bits.dependent_slice_segment_flag = 0;
2792 slice_param.slice_fields.bits.colour_plane_id = ssh.colour_plane_id;
2793 slice_param.slice_fields.bits.slice_temporal_mvp_enabled_flag = ssh.slice_temporal_mvp_enabled_flag;
2794 slice_param.slice_fields.bits.slice_sao_luma_flag = ssh.slice_sao_luma_flag;
2795 slice_param.slice_fields.bits.slice_sao_chroma_flag = ssh.slice_sao_luma_flag;
2796 slice_param.slice_fields.bits.num_ref_idx_active_override_flag = ssh.num_ref_idx_active_override_flag;
2797 slice_param.slice_fields.bits.mvd_l1_zero_flag = 0;
2798 slice_param.slice_fields.bits.cabac_init_flag = 0;
2799 slice_param.slice_fields.bits.slice_deblocking_filter_disabled_flag = ssh.disable_deblocking_filter_flag;
2800 slice_param.slice_fields.bits.slice_loop_filter_across_slices_enabled_flag = ssh.slice_loop_filter_across_slices_enabled_flag;
2801 slice_param.slice_fields.bits.collocated_from_l0_flag = ssh.collocated_from_l0_flag;
2802
2803 if (hevc_packedheader &&
2804 config_attrib[enc_packed_header_idx].value & VA_ENC_PACKED_HEADER_SLICE)
2805 render_packedslice();
2806
2807 va_status = vaCreateBuffer(va_dpy, context_id, VAEncSliceParameterBufferType,
2808 sizeof(slice_param), 1, &slice_param, &slice_param_buf);
2809 CHECK_VASTATUS(va_status, "vaCreateBuffer");;
2810
2811 va_status = vaRenderPicture(va_dpy, context_id, &slice_param_buf, 1);
2812 CHECK_VASTATUS(va_status, "vaRenderPicture");
2813
2814 if (slice_param_buf != VA_INVALID_ID) {
2815 vaDestroyBuffer(va_dpy, slice_param_buf);
2816 slice_param_buf = VA_INVALID_ID;
2817 }
2818
2819 return 0;
2820 }
2821
2822
upload_source_YUV_once_for_all()2823 static int upload_source_YUV_once_for_all()
2824 {
2825 int box_width = 8;
2826 int row_shift = 0;
2827 int i;
2828
2829 for (i = 0; i < SURFACE_NUM; i++) {
2830 printf("\rLoading data into surface %d.....", i);
2831 upload_surface(va_dpy, src_surface[i], box_width, row_shift, 0);
2832
2833 row_shift++;
2834 if (row_shift == (2 * box_width)) row_shift = 0;
2835 }
2836 printf("Complete surface loading\n");
2837
2838 return 0;
2839 }
2840
load_surface(VASurfaceID surface_id,unsigned long long display_order)2841 static int load_surface(VASurfaceID surface_id, unsigned long long display_order)
2842 {
2843 unsigned char *srcyuv_ptr = NULL, *src_Y = NULL, *src_U = NULL, *src_V = NULL;
2844 unsigned long long frame_start, mmap_start;
2845 char *mmap_ptr = NULL;
2846 int frame_size, mmap_size;
2847
2848 if (srcyuv_fp == NULL)
2849 return 0;
2850
2851 /* allow encoding more than srcyuv_frames */
2852 display_order = display_order % srcyuv_frames;
2853 frame_size = frame_width * frame_height * 3 / 2; /* for YUV420 */
2854 frame_start = display_order * frame_size;
2855
2856 mmap_start = frame_start & (~0xfff);
2857 mmap_size = (frame_size + (frame_start & 0xfff) + 0xfff) & (~0xfff);
2858 mmap_ptr = mmap(0, mmap_size, PROT_READ, MAP_SHARED,
2859 fileno(srcyuv_fp), mmap_start);
2860 if (mmap_ptr == MAP_FAILED) {
2861 printf("Failed to mmap YUV file (%s)\n", strerror(errno));
2862 return 1;
2863 }
2864 srcyuv_ptr = (unsigned char *)mmap_ptr + (frame_start & 0xfff);
2865 if (srcyuv_fourcc == VA_FOURCC_NV12) {
2866 src_Y = srcyuv_ptr;
2867 src_U = src_Y + frame_width * frame_height;
2868 src_V = NULL;
2869 } else if (srcyuv_fourcc == VA_FOURCC_IYUV ||
2870 srcyuv_fourcc == VA_FOURCC_YV12) {
2871 src_Y = srcyuv_ptr;
2872 if (srcyuv_fourcc == VA_FOURCC_IYUV) {
2873 src_U = src_Y + frame_width * frame_height;
2874 src_V = src_U + (frame_width / 2) * (frame_height / 2);
2875 } else { /* YV12 */
2876 src_V = src_Y + frame_width * frame_height;
2877 src_U = src_V + (frame_width / 2) * (frame_height / 2);
2878 }
2879 } else {
2880 printf("Unsupported source YUV format\n");
2881 exit(1);
2882 }
2883
2884 upload_surface_yuv(va_dpy, surface_id,
2885 srcyuv_fourcc, frame_width, frame_height,
2886 src_Y, src_U, src_V);
2887 if (mmap_ptr)
2888 munmap(mmap_ptr, mmap_size);
2889
2890 return 0;
2891 }
2892
2893
save_recyuv(VASurfaceID surface_id,unsigned long long display_order,unsigned long long encode_order)2894 static int save_recyuv(VASurfaceID surface_id,
2895 unsigned long long display_order,
2896 unsigned long long encode_order)
2897 {
2898 unsigned char *dst_Y = NULL, *dst_U = NULL, *dst_V = NULL;
2899
2900 if (recyuv_fp == NULL)
2901 return 0;
2902
2903 if (srcyuv_fourcc == VA_FOURCC_NV12) {
2904 int uv_size = 2 * (frame_width / 2) * (frame_height / 2);
2905 dst_Y = malloc(2 * uv_size);
2906 if (dst_Y == NULL) {
2907 printf("Failed to allocate memory for dst_Y\n");
2908 exit(1);
2909 }
2910
2911 dst_U = malloc(uv_size);
2912 if (dst_U == NULL) {
2913 printf("Failed to allocate memory for dst_U\n");
2914 free(dst_Y);
2915 exit(1);
2916 }
2917
2918 memset(dst_Y, 0, 2 * uv_size);
2919 memset(dst_U, 0, uv_size);
2920 } else if (srcyuv_fourcc == VA_FOURCC_IYUV ||
2921 srcyuv_fourcc == VA_FOURCC_YV12) {
2922 int uv_size = (frame_width / 2) * (frame_height / 2);
2923 dst_Y = malloc(4 * uv_size);
2924 if (dst_Y == NULL) {
2925 printf("Failed to allocate memory for dst_Y\n");
2926 exit(1);
2927 }
2928
2929 dst_U = malloc(uv_size);
2930 if (dst_U == NULL) {
2931 printf("Failed to allocate memory for dst_U\n");
2932 free(dst_Y);
2933 exit(1);
2934 }
2935
2936 dst_V = malloc(uv_size);
2937 if (dst_V == NULL) {
2938 printf("Failed to allocate memory for dst_V\n");
2939 free(dst_Y);
2940 free(dst_U);
2941 exit(1);
2942 }
2943
2944 memset(dst_Y, 0, 4 * uv_size);
2945 memset(dst_U, 0, uv_size);
2946 memset(dst_V, 0, uv_size);
2947 } else {
2948 printf("Unsupported source YUV format\n");
2949 exit(1);
2950 }
2951
2952 download_surface_yuv(va_dpy, surface_id,
2953 srcyuv_fourcc, frame_width, frame_height,
2954 dst_Y, dst_U, dst_V);
2955 fseek(recyuv_fp, display_order * frame_width * frame_height * 1.5, SEEK_SET);
2956
2957 if (srcyuv_fourcc == VA_FOURCC_NV12) {
2958 int uv_size = 2 * (frame_width / 2) * (frame_height / 2);
2959 fwrite(dst_Y, uv_size * 2, 1, recyuv_fp);
2960 fwrite(dst_U, uv_size, 1, recyuv_fp);
2961 } else if (srcyuv_fourcc == VA_FOURCC_IYUV ||
2962 srcyuv_fourcc == VA_FOURCC_YV12) {
2963 int uv_size = (frame_width / 2) * (frame_height / 2);
2964 fwrite(dst_Y, uv_size * 4, 1, recyuv_fp);
2965
2966 if (srcyuv_fourcc == VA_FOURCC_IYUV) {
2967 fwrite(dst_U, uv_size, 1, recyuv_fp);
2968 fwrite(dst_V, uv_size, 1, recyuv_fp);
2969 } else {
2970 fwrite(dst_V, uv_size, 1, recyuv_fp);
2971 fwrite(dst_U, uv_size, 1, recyuv_fp);
2972 }
2973 }
2974
2975 if (dst_Y)
2976 free(dst_Y);
2977 if (dst_U)
2978 free(dst_U);
2979 if (dst_V)
2980 free(dst_V);
2981
2982 fflush(recyuv_fp);
2983
2984 return 0;
2985 }
2986
2987
save_codeddata(unsigned long long display_order,unsigned long long encode_order)2988 static int save_codeddata(unsigned long long display_order, unsigned long long encode_order)
2989 {
2990 VACodedBufferSegment *buf_list = NULL;
2991 VAStatus va_status;
2992 unsigned int coded_size = 0;
2993
2994 va_status = vaMapBuffer(va_dpy, coded_buf[display_order % SURFACE_NUM], (void **)(&buf_list));
2995 CHECK_VASTATUS(va_status, "vaMapBuffer");
2996 while (buf_list != NULL) {
2997 coded_size += fwrite(buf_list->buf, 1, buf_list->size, coded_fp);
2998 buf_list = (VACodedBufferSegment *) buf_list->next;
2999
3000 frame_size += coded_size;
3001 }
3002 vaUnmapBuffer(va_dpy, coded_buf[display_order % SURFACE_NUM]);
3003
3004 printf("\n "); /* return back to startpoint */
3005 switch (encode_order % 4) {
3006 case 0:
3007 printf("|");
3008 break;
3009 case 1:
3010 printf("/");
3011 break;
3012 case 2:
3013 printf("-");
3014 break;
3015 case 3:
3016 printf("\\");
3017 break;
3018 }
3019 printf("%08lld", encode_order);
3020 printf("(%06d bytes coded)\n", coded_size);
3021
3022 fflush(coded_fp);
3023
3024 return 0;
3025 }
3026
3027
storage_task_dequeue(void)3028 static struct storage_task_t * storage_task_dequeue(void)
3029 {
3030 struct storage_task_t *header;
3031
3032 pthread_mutex_lock(&encode_mutex);
3033
3034 header = storage_task_header;
3035 if (storage_task_header != NULL) {
3036 if (storage_task_tail == storage_task_header)
3037 storage_task_tail = NULL;
3038 storage_task_header = header->next;
3039 }
3040
3041 pthread_mutex_unlock(&encode_mutex);
3042
3043 return header;
3044 }
3045
storage_task_queue(unsigned long long display_order,unsigned long long encode_order)3046 static int storage_task_queue(unsigned long long display_order, unsigned long long encode_order)
3047 {
3048 struct storage_task_t *tmp;
3049
3050 tmp = calloc(1, sizeof(struct storage_task_t));
3051 if (tmp) {
3052 tmp->display_order = display_order;
3053 tmp->encode_order = encode_order;
3054 }
3055
3056 pthread_mutex_lock(&encode_mutex);
3057
3058 if (storage_task_header == NULL) {
3059 storage_task_header = tmp;
3060 storage_task_tail = tmp;
3061 } else {
3062 storage_task_tail->next = tmp;
3063 storage_task_tail = tmp;
3064 }
3065
3066 srcsurface_status[display_order % SURFACE_NUM] = SRC_SURFACE_IN_STORAGE;
3067 pthread_cond_signal(&encode_cond);
3068
3069 pthread_mutex_unlock(&encode_mutex);
3070
3071 return 0;
3072 }
3073
storage_task(unsigned long long display_order,unsigned long long encode_order)3074 static void storage_task(unsigned long long display_order, unsigned long long encode_order)
3075 {
3076 unsigned int tmp;
3077 VAStatus va_status;
3078
3079 tmp = GetTickCount();
3080 va_status = vaSyncSurface(va_dpy, src_surface[display_order % SURFACE_NUM]);
3081 CHECK_VASTATUS(va_status, "vaSyncSurface");
3082 SyncPictureTicks += GetTickCount() - tmp;
3083 tmp = GetTickCount();
3084 save_codeddata(display_order, encode_order);
3085 SavePictureTicks += GetTickCount() - tmp;
3086
3087 save_recyuv(ref_surface[display_order % SURFACE_NUM], display_order, encode_order);
3088
3089 /* reload a new frame data */
3090 tmp = GetTickCount();
3091 if (srcyuv_fp != NULL)
3092 load_surface(src_surface[display_order % SURFACE_NUM], display_order + SURFACE_NUM);
3093 UploadPictureTicks += GetTickCount() - tmp;
3094
3095 pthread_mutex_lock(&encode_mutex);
3096 srcsurface_status[display_order % SURFACE_NUM] = SRC_SURFACE_IN_ENCODING;
3097 pthread_mutex_unlock(&encode_mutex);
3098 }
3099
3100
storage_task_thread(void * t)3101 static void * storage_task_thread(void *t)
3102 {
3103 while (1) {
3104 struct storage_task_t *current;
3105
3106 current = storage_task_dequeue();
3107 if (current == NULL) {
3108 pthread_mutex_lock(&encode_mutex);
3109 pthread_cond_wait(&encode_cond, &encode_mutex);
3110 pthread_mutex_unlock(&encode_mutex);
3111 continue;
3112 }
3113
3114 storage_task(current->display_order, current->encode_order);
3115
3116 free(current);
3117
3118 /* all frames are saved, exit the thread */
3119 if (++frame_coded >= frame_count)
3120 break;
3121 }
3122
3123 return 0;
3124 }
3125
3126
encode_frames(void)3127 static int encode_frames(void)
3128 {
3129 unsigned int i, tmp;
3130 VAStatus va_status;
3131 //VASurfaceStatus surface_status;
3132
3133 /* upload RAW YUV data into all surfaces */
3134 tmp = GetTickCount();
3135 if (srcyuv_fp != NULL) {
3136 for (i = 0; i < SURFACE_NUM; i++)
3137 load_surface(src_surface[i], i);
3138 } else
3139 upload_source_YUV_once_for_all();
3140 UploadPictureTicks += GetTickCount() - tmp;
3141
3142 /* ready for encoding */
3143 memset(srcsurface_status, SRC_SURFACE_IN_ENCODING, sizeof(srcsurface_status));
3144
3145 memset(&seq_param, 0, sizeof(seq_param));
3146 memset(&pic_param, 0, sizeof(pic_param));
3147 memset(&slice_param, 0, sizeof(slice_param));
3148
3149 if (encode_syncmode == 0)
3150 pthread_create(&encode_thread, NULL, storage_task_thread, NULL);
3151
3152 for (current_frame_encoding = 0; current_frame_encoding < frame_count; current_frame_encoding++) {
3153 encoding2display_order(current_frame_encoding, intra_period, intra_idr_period, ip_period,
3154 ¤t_frame_display, ¤t_frame_type);
3155 if (current_frame_type == FRAME_IDR) {
3156 numShortTerm = 0;
3157 current_frame_num = 0;
3158 current_IDR_display = current_frame_display;
3159 }
3160 printf("%s : %lld %s : %lld type : %d\n", "encoding order", current_frame_encoding, "Display order", current_frame_display, current_frame_type);
3161 /* check if the source frame is ready */
3162 while (srcsurface_status[current_slot] != SRC_SURFACE_IN_ENCODING) {
3163 usleep(1);
3164 }
3165
3166 tmp = GetTickCount();
3167 va_status = vaBeginPicture(va_dpy, context_id, src_surface[current_slot]);
3168 CHECK_VASTATUS(va_status, "vaBeginPicture");
3169 BeginPictureTicks += GetTickCount() - tmp;
3170 fill_vps_header(&vps);
3171 fill_sps_header(&sps, 0);
3172 fill_pps_header(&pps, 0, 0);
3173 tmp = GetTickCount();
3174 if (current_frame_type == FRAME_IDR) {
3175 render_sequence(&sps);
3176 render_packedvideo();
3177 render_packedsequence();
3178 }
3179 render_packedpicture();
3180 render_picture(&pps);
3181 fill_slice_header(0, &pps, &ssh);
3182 render_slice();
3183 RenderPictureTicks += GetTickCount() - tmp;
3184
3185 tmp = GetTickCount();
3186 va_status = vaEndPicture(va_dpy, context_id);
3187 CHECK_VASTATUS(va_status, "vaEndPicture");;
3188 EndPictureTicks += GetTickCount() - tmp;
3189
3190 if (encode_syncmode)
3191 storage_task(current_frame_display, current_frame_encoding);
3192 else /* queue the storage task queue */
3193 storage_task_queue(current_frame_display, current_frame_encoding);
3194
3195 update_ReferenceFrames();
3196 }
3197
3198 if (encode_syncmode == 0) {
3199 int ret;
3200 pthread_join(encode_thread, (void **)&ret);
3201 }
3202
3203 return 0;
3204 }
3205
3206
release_encode()3207 static int release_encode()
3208 {
3209 int i;
3210
3211 vaDestroySurfaces(va_dpy, &src_surface[0], SURFACE_NUM);
3212 vaDestroySurfaces(va_dpy, &ref_surface[0], SURFACE_NUM);
3213
3214 for (i = 0; i < SURFACE_NUM; i++)
3215 vaDestroyBuffer(va_dpy, coded_buf[i]);
3216
3217 vaDestroyContext(va_dpy, context_id);
3218 vaDestroyConfig(va_dpy, config_id);
3219
3220 return 0;
3221 }
3222
deinit_va()3223 static int deinit_va()
3224 {
3225 vaTerminate(va_dpy);
3226
3227 va_close_display(va_dpy);
3228
3229 return 0;
3230 }
3231
3232
print_input()3233 static int print_input()
3234 {
3235 printf("\n\nINPUT:Try to encode HEVC...\n");
3236 if (rc_mode != -1)
3237 printf("INPUT: RateControl : %s\n", rc_to_string(rc_mode));
3238 printf("INPUT: Resolution : %dx%d, %d frames\n",
3239 frame_width, frame_height, frame_count);
3240 printf("INPUT: FrameRate : %d\n", frame_rate);
3241 printf("INPUT: Bitrate : %d\n", frame_bitrate);
3242 printf("INPUT: Slieces : %d\n", frame_slices);
3243 printf("INPUT: IntraPeriod : %d\n", intra_period);
3244 printf("INPUT: IDRPeriod : %d\n", intra_idr_period);
3245 printf("INPUT: IpPeriod : %d\n", ip_period);
3246 printf("INPUT: Initial QP : %d\n", initial_qp);
3247 printf("INPUT: Min QP : %d\n", minimal_qp);
3248 printf("INPUT: P As B : %d\n", p2b);
3249 printf("INPUT: lowpower : %d\n", lowpower);
3250 printf("INPUT: Source YUV : %s", srcyuv_fp ? "FILE" : "AUTO generated");
3251 if (srcyuv_fp)
3252 printf(":%s (fourcc %s)\n", srcyuv_fn, fourcc_to_string(srcyuv_fourcc));
3253 else
3254 printf("\n");
3255 printf("INPUT: Coded Clip : %s\n", coded_fn);
3256 if (recyuv_fp == NULL)
3257 printf("INPUT: Rec Clip : %s\n", "Not save reconstructed frame");
3258 else
3259 printf("INPUT: Rec Clip : Save reconstructed frame into %s (fourcc %s)\n", recyuv_fn,
3260 fourcc_to_string(srcyuv_fourcc));
3261
3262 printf("\n\n"); /* return back to startpoint */
3263
3264 return 0;
3265 }
3266
calc_PSNR(double * psnr)3267 static int calc_PSNR(double *psnr)
3268 {
3269 char *srcyuv_ptr = NULL, *recyuv_ptr = NULL, tmp;
3270 unsigned long long min_size;
3271 unsigned long long i, sse = 0;
3272 double ssemean;
3273 int fourM = 0x400000; /* 4M */
3274
3275 min_size = MIN(srcyuv_frames, frame_count) * frame_width * frame_height * 1.5;
3276 for (i = 0; i < min_size; i++) {
3277 unsigned long long j = i % fourM;
3278
3279 if ((i % fourM) == 0) {
3280 if (srcyuv_ptr)
3281 munmap(srcyuv_ptr, fourM);
3282 if (recyuv_ptr)
3283 munmap(recyuv_ptr, fourM);
3284
3285 srcyuv_ptr = mmap(0, fourM, PROT_READ, MAP_SHARED, fileno(srcyuv_fp), i);
3286 recyuv_ptr = mmap(0, fourM, PROT_READ, MAP_SHARED, fileno(recyuv_fp), i);
3287 if ((srcyuv_ptr == MAP_FAILED) || (recyuv_ptr == MAP_FAILED)) {
3288 if (srcyuv_ptr != MAP_FAILED)
3289 munmap(srcyuv_ptr, fourM);
3290 if (recyuv_ptr != MAP_FAILED)
3291 munmap(recyuv_ptr, fourM);
3292 printf("Failed to mmap YUV files\n");
3293 return 1;
3294 }
3295 }
3296 tmp = srcyuv_ptr[j] - recyuv_ptr[j];
3297 sse += tmp * tmp;
3298 }
3299 ssemean = (double)sse / (double)min_size;
3300 *psnr = 20.0 * log10(255) - 10.0 * log10(ssemean);
3301
3302 if (srcyuv_ptr)
3303 munmap(srcyuv_ptr, fourM);
3304 if (recyuv_ptr)
3305 munmap(recyuv_ptr, fourM);
3306
3307 return 0;
3308 }
3309
print_performance(unsigned int PictureCount)3310 static int print_performance(unsigned int PictureCount)
3311 {
3312 unsigned int psnr_ret = 1, others = 0;
3313 double psnr = 0, total_size = frame_width * frame_height * 1.5 * frame_count;
3314
3315 if (calc_psnr && srcyuv_fp && recyuv_fp)
3316 psnr_ret = calc_PSNR(&psnr);
3317
3318 others = TotalTicks - UploadPictureTicks - BeginPictureTicks
3319 - RenderPictureTicks - EndPictureTicks - SyncPictureTicks - SavePictureTicks;
3320
3321 printf("\n\n");
3322
3323 printf("PERFORMANCE: Frame Rate : %.2f fps (%d frames, %d ms (%.2f ms per frame))\n",
3324 (double) 1000 * PictureCount / TotalTicks, PictureCount,
3325 TotalTicks, ((double) TotalTicks) / (double) PictureCount);
3326 printf("PERFORMANCE: Compression ratio : %d:1\n", (unsigned int)(total_size / frame_size));
3327 if (psnr_ret == 0)
3328 printf("PERFORMANCE: PSNR : %.2f (%lld frames calculated)\n",
3329 psnr, MIN(frame_count, srcyuv_frames));
3330
3331 printf("PERFORMANCE: UploadPicture : %d ms (%.2f, %.2f%% percent)\n",
3332 (int) UploadPictureTicks, ((double) UploadPictureTicks) / (double) PictureCount,
3333 UploadPictureTicks / (double) TotalTicks / 0.01);
3334 printf("PERFORMANCE: vaBeginPicture : %d ms (%.2f, %.2f%% percent)\n",
3335 (int) BeginPictureTicks, ((double) BeginPictureTicks) / (double) PictureCount,
3336 BeginPictureTicks / (double) TotalTicks / 0.01);
3337 printf("PERFORMANCE: vaRenderHeader : %d ms (%.2f, %.2f%% percent)\n",
3338 (int) RenderPictureTicks, ((double) RenderPictureTicks) / (double) PictureCount,
3339 RenderPictureTicks / (double) TotalTicks / 0.01);
3340 printf("PERFORMANCE: vaEndPicture : %d ms (%.2f, %.2f%% percent)\n",
3341 (int) EndPictureTicks, ((double) EndPictureTicks) / (double) PictureCount,
3342 EndPictureTicks / (double) TotalTicks / 0.01);
3343 printf("PERFORMANCE: vaSyncSurface : %d ms (%.2f, %.2f%% percent)\n",
3344 (int) SyncPictureTicks, ((double) SyncPictureTicks) / (double) PictureCount,
3345 SyncPictureTicks / (double) TotalTicks / 0.01);
3346 printf("PERFORMANCE: SavePicture : %d ms (%.2f, %.2f%% percent)\n",
3347 (int) SavePictureTicks, ((double) SavePictureTicks) / (double) PictureCount,
3348 SavePictureTicks / (double) TotalTicks / 0.01);
3349 printf("PERFORMANCE: Others : %d ms (%.2f, %.2f%% percent)\n",
3350 (int) others, ((double) others) / (double) PictureCount,
3351 others / (double) TotalTicks / 0.01);
3352
3353 if (encode_syncmode == 0)
3354 printf("(Multithread enabled, the timing is only for reference)\n");
3355
3356 return 0;
3357 }
3358
3359
main(int argc,char ** argv)3360 int main(int argc, char **argv)
3361 {
3362 unsigned int start;
3363
3364 va_init_display_args(&argc, argv);
3365 process_cmdline(argc, argv);
3366
3367 print_input();
3368
3369 start = GetTickCount();
3370
3371 init_va();
3372 setup_encode();
3373
3374 encode_frames();
3375
3376 release_encode();
3377 deinit_va();
3378
3379 TotalTicks += GetTickCount() - start;
3380 print_performance(frame_count);
3381
3382 return 0;
3383 }
3384