1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file contains an implementation of an H264 Annex-B video stream parser. 6 7 #ifndef MEDIA_FILTERS_H264_PARSER_H_ 8 #define MEDIA_FILTERS_H264_PARSER_H_ 9 10 #include <sys/types.h> 11 12 #include <map> 13 14 #include "base/basictypes.h" 15 #include "media/base/media_export.h" 16 #include "media/filters/h264_bit_reader.h" 17 18 namespace media { 19 20 // For explanations of each struct and its members, see H.264 specification 21 // at http://www.itu.int/rec/T-REC-H.264. 22 struct MEDIA_EXPORT H264NALU { 23 H264NALU(); 24 25 enum Type { 26 kUnspecified = 0, 27 kNonIDRSlice = 1, 28 kSliceDataA = 2, 29 kSliceDataB = 3, 30 kSliceDataC = 4, 31 kIDRSlice = 5, 32 kSEIMessage = 6, 33 kSPS = 7, 34 kPPS = 8, 35 kAUD = 9, 36 kEOSeq = 10, 37 kEOStream = 11, 38 kFiller = 12, 39 kSPSExt = 13, 40 kReserved14 = 14, 41 kReserved15 = 15, 42 kReserved16 = 16, 43 kReserved17 = 17, 44 kReserved18 = 18, 45 kCodedSliceAux = 19, 46 kCodedSliceExtension = 20, 47 }; 48 49 // After (without) start code; we don't own the underlying memory 50 // and a shallow copy should be made when copying this struct. 51 const uint8* data; 52 off_t size; // From after start code to start code of next NALU (or EOS). 53 54 int nal_ref_idc; 55 int nal_unit_type; 56 }; 57 58 enum { 59 kH264ScalingList4x4Length = 16, 60 kH264ScalingList8x8Length = 64, 61 }; 62 63 struct MEDIA_EXPORT H264SPS { 64 H264SPS(); 65 66 int profile_idc; 67 bool constraint_set0_flag; 68 bool constraint_set1_flag; 69 bool constraint_set2_flag; 70 bool constraint_set3_flag; 71 bool constraint_set4_flag; 72 bool constraint_set5_flag; 73 int level_idc; 74 int seq_parameter_set_id; 75 76 int chroma_format_idc; 77 bool separate_colour_plane_flag; 78 int bit_depth_luma_minus8; 79 int bit_depth_chroma_minus8; 80 bool qpprime_y_zero_transform_bypass_flag; 81 82 bool seq_scaling_matrix_present_flag; 83 int scaling_list4x4[6][kH264ScalingList4x4Length]; 84 int scaling_list8x8[6][kH264ScalingList8x8Length]; 85 86 int log2_max_frame_num_minus4; 87 int pic_order_cnt_type; 88 int log2_max_pic_order_cnt_lsb_minus4; 89 bool delta_pic_order_always_zero_flag; 90 int offset_for_non_ref_pic; 91 int offset_for_top_to_bottom_field; 92 int num_ref_frames_in_pic_order_cnt_cycle; 93 int expected_delta_per_pic_order_cnt_cycle; // calculated 94 int offset_for_ref_frame[255]; 95 int max_num_ref_frames; 96 bool gaps_in_frame_num_value_allowed_flag; 97 int pic_width_in_mbs_minus1; 98 int pic_height_in_map_units_minus1; 99 bool frame_mbs_only_flag; 100 bool mb_adaptive_frame_field_flag; 101 bool direct_8x8_inference_flag; 102 bool frame_cropping_flag; 103 int frame_crop_left_offset; 104 int frame_crop_right_offset; 105 int frame_crop_top_offset; 106 int frame_crop_bottom_offset; 107 108 bool vui_parameters_present_flag; 109 int sar_width; // Set to 0 when not specified. 110 int sar_height; // Set to 0 when not specified. 111 bool bitstream_restriction_flag; 112 int max_num_reorder_frames; 113 int max_dec_frame_buffering; 114 115 int chroma_array_type; 116 }; 117 118 struct MEDIA_EXPORT H264PPS { 119 H264PPS(); 120 121 int pic_parameter_set_id; 122 int seq_parameter_set_id; 123 bool entropy_coding_mode_flag; 124 bool bottom_field_pic_order_in_frame_present_flag; 125 int num_slice_groups_minus1; 126 // TODO(posciak): Slice groups not implemented, could be added at some point. 127 int num_ref_idx_l0_default_active_minus1; 128 int num_ref_idx_l1_default_active_minus1; 129 bool weighted_pred_flag; 130 int weighted_bipred_idc; 131 int pic_init_qp_minus26; 132 int pic_init_qs_minus26; 133 int chroma_qp_index_offset; 134 bool deblocking_filter_control_present_flag; 135 bool constrained_intra_pred_flag; 136 bool redundant_pic_cnt_present_flag; 137 bool transform_8x8_mode_flag; 138 139 bool pic_scaling_matrix_present_flag; 140 int scaling_list4x4[6][kH264ScalingList4x4Length]; 141 int scaling_list8x8[6][kH264ScalingList8x8Length]; 142 143 int second_chroma_qp_index_offset; 144 }; 145 146 struct MEDIA_EXPORT H264ModificationOfPicNum { 147 int modification_of_pic_nums_idc; 148 union { 149 int abs_diff_pic_num_minus1; 150 int long_term_pic_num; 151 }; 152 }; 153 154 struct MEDIA_EXPORT H264WeightingFactors { 155 bool luma_weight_flag; 156 bool chroma_weight_flag; 157 int luma_weight[32]; 158 int luma_offset[32]; 159 int chroma_weight[32][2]; 160 int chroma_offset[32][2]; 161 }; 162 163 struct MEDIA_EXPORT H264DecRefPicMarking { 164 int memory_mgmnt_control_operation; 165 int difference_of_pic_nums_minus1; 166 int long_term_pic_num; 167 int long_term_frame_idx; 168 int max_long_term_frame_idx_plus1; 169 }; 170 171 struct MEDIA_EXPORT H264SliceHeader { 172 H264SliceHeader(); 173 174 enum { 175 kRefListSize = 32, 176 kRefListModSize = kRefListSize 177 }; 178 179 enum Type { 180 kPSlice = 0, 181 kBSlice = 1, 182 kISlice = 2, 183 kSPSlice = 3, 184 kSISlice = 4, 185 }; 186 187 bool IsPSlice() const; 188 bool IsBSlice() const; 189 bool IsISlice() const; 190 bool IsSPSlice() const; 191 bool IsSISlice() const; 192 193 bool idr_pic_flag; // from NAL header 194 int nal_ref_idc; // from NAL header 195 const uint8* nalu_data; // from NAL header 196 off_t nalu_size; // from NAL header 197 off_t header_bit_size; // calculated 198 199 int first_mb_in_slice; 200 int slice_type; 201 int pic_parameter_set_id; 202 int colour_plane_id; // TODO(posciak): use this! http://crbug.com/139878 203 int frame_num; 204 bool field_pic_flag; 205 bool bottom_field_flag; 206 int idr_pic_id; 207 int pic_order_cnt_lsb; 208 int delta_pic_order_cnt_bottom; 209 int delta_pic_order_cnt[2]; 210 int redundant_pic_cnt; 211 bool direct_spatial_mv_pred_flag; 212 213 bool num_ref_idx_active_override_flag; 214 int num_ref_idx_l0_active_minus1; 215 int num_ref_idx_l1_active_minus1; 216 bool ref_pic_list_modification_flag_l0; 217 bool ref_pic_list_modification_flag_l1; 218 H264ModificationOfPicNum ref_list_l0_modifications[kRefListModSize]; 219 H264ModificationOfPicNum ref_list_l1_modifications[kRefListModSize]; 220 221 int luma_log2_weight_denom; 222 int chroma_log2_weight_denom; 223 224 bool luma_weight_l0_flag; 225 bool chroma_weight_l0_flag; 226 H264WeightingFactors pred_weight_table_l0; 227 228 bool luma_weight_l1_flag; 229 bool chroma_weight_l1_flag; 230 H264WeightingFactors pred_weight_table_l1; 231 232 bool no_output_of_prior_pics_flag; 233 bool long_term_reference_flag; 234 235 bool adaptive_ref_pic_marking_mode_flag; 236 H264DecRefPicMarking ref_pic_marking[kRefListSize]; 237 238 int cabac_init_idc; 239 int slice_qp_delta; 240 bool sp_for_switch_flag; 241 int slice_qs_delta; 242 int disable_deblocking_filter_idc; 243 int slice_alpha_c0_offset_div2; 244 int slice_beta_offset_div2; 245 }; 246 247 struct H264SEIRecoveryPoint { 248 int recovery_frame_cnt; 249 bool exact_match_flag; 250 bool broken_link_flag; 251 int changing_slice_group_idc; 252 }; 253 254 struct MEDIA_EXPORT H264SEIMessage { 255 H264SEIMessage(); 256 257 enum Type { 258 kSEIRecoveryPoint = 6, 259 }; 260 261 int type; 262 int payload_size; 263 union { 264 // Placeholder; in future more supported types will contribute to more 265 // union members here. 266 H264SEIRecoveryPoint recovery_point; 267 }; 268 }; 269 270 // Class to parse an Annex-B H.264 stream, 271 // as specified in chapters 7 and Annex B of the H.264 spec. 272 class MEDIA_EXPORT H264Parser { 273 public: 274 enum Result { 275 kOk, 276 kInvalidStream, // error in stream 277 kUnsupportedStream, // stream not supported by the parser 278 kEOStream, // end of stream 279 }; 280 281 // Find offset from start of data to next NALU start code 282 // and size of found start code (3 or 4 bytes). 283 // If no start code is found, offset is pointing to the first unprocessed byte 284 // (i.e. the first byte that was not considered as a possible start of a start 285 // code) and |*start_code_size| is set to 0. 286 // Preconditions: 287 // - |data_size| >= 0 288 // Postconditions: 289 // - |*offset| is between 0 and |data_size| included. 290 // It is strictly less than |data_size| if |data_size| > 0. 291 // - |*start_code_size| is either 0, 3 or 4. 292 static bool FindStartCode(const uint8* data, off_t data_size, 293 off_t* offset, off_t* start_code_size); 294 295 H264Parser(); 296 ~H264Parser(); 297 298 void Reset(); 299 // Set current stream pointer to |stream| of |stream_size| in bytes, 300 // |stream| owned by caller. 301 void SetStream(const uint8* stream, off_t stream_size); 302 303 // Read the stream to find the next NALU, identify it and return 304 // that information in |*nalu|. This advances the stream to the beginning 305 // of this NALU, but not past it, so subsequent calls to NALU-specific 306 // parsing functions (ParseSPS, etc.) will parse this NALU. 307 // If the caller wishes to skip the current NALU, it can call this function 308 // again, instead of any NALU-type specific parse functions below. 309 Result AdvanceToNextNALU(H264NALU* nalu); 310 311 // NALU-specific parsing functions. 312 // These should be called after AdvanceToNextNALU(). 313 314 // SPSes and PPSes are owned by the parser class and the memory for their 315 // structures is managed here, not by the caller, as they are reused 316 // across NALUs. 317 // 318 // Parse an SPS/PPS NALU and save their data in the parser, returning id 319 // of the parsed structure in |*pps_id|/|*sps_id|. 320 // To get a pointer to a given SPS/PPS structure, use GetSPS()/GetPPS(), 321 // passing the returned |*sps_id|/|*pps_id| as parameter. 322 // TODO(posciak,fischman): consider replacing returning Result from Parse*() 323 // methods with a scoped_ptr and adding an AtEOS() function to check for EOS 324 // if Parse*() return NULL. 325 Result ParseSPS(int* sps_id); 326 Result ParsePPS(int* pps_id); 327 328 // Return a pointer to SPS/PPS with given |sps_id|/|pps_id| or NULL if not 329 // present. 330 const H264SPS* GetSPS(int sps_id); 331 const H264PPS* GetPPS(int pps_id); 332 333 // Slice headers and SEI messages are not used across NALUs by the parser 334 // and can be discarded after current NALU, so the parser does not store 335 // them, nor does it manage their memory. 336 // The caller has to provide and manage it instead. 337 338 // Parse a slice header, returning it in |*shdr|. |*nalu| must be set to 339 // the NALU returned from AdvanceToNextNALU() and corresponding to |*shdr|. 340 Result ParseSliceHeader(const H264NALU& nalu, H264SliceHeader* shdr); 341 342 // Parse a SEI message, returning it in |*sei_msg|, provided and managed 343 // by the caller. 344 Result ParseSEI(H264SEIMessage* sei_msg); 345 346 private: 347 // Move the stream pointer to the beginning of the next NALU, 348 // i.e. pointing at the next start code. 349 // Return true if a NALU has been found. 350 // If a NALU is found: 351 // - its size in bytes is returned in |*nalu_size| and includes 352 // the start code as well as the trailing zero bits. 353 // - the size in bytes of the start code is returned in |*start_code_size|. 354 bool LocateNALU(off_t* nalu_size, off_t* start_code_size); 355 356 // Exp-Golomb code parsing as specified in chapter 9.1 of the spec. 357 // Read one unsigned exp-Golomb code from the stream and return in |*val|. 358 Result ReadUE(int* val); 359 360 // Read one signed exp-Golomb code from the stream and return in |*val|. 361 Result ReadSE(int* val); 362 363 // Parse scaling lists (see spec). 364 Result ParseScalingList(int size, int* scaling_list, bool* use_default); 365 Result ParseSPSScalingLists(H264SPS* sps); 366 Result ParsePPSScalingLists(const H264SPS& sps, H264PPS* pps); 367 368 // Parse optional VUI parameters in SPS (see spec). 369 Result ParseVUIParameters(H264SPS* sps); 370 // Set |hrd_parameters_present| to true only if they are present. 371 Result ParseAndIgnoreHRDParameters(bool* hrd_parameters_present); 372 373 // Parse reference picture lists' modifications (see spec). 374 Result ParseRefPicListModifications(H264SliceHeader* shdr); 375 Result ParseRefPicListModification(int num_ref_idx_active_minus1, 376 H264ModificationOfPicNum* ref_list_mods); 377 378 // Parse prediction weight table (see spec). 379 Result ParsePredWeightTable(const H264SPS& sps, H264SliceHeader* shdr); 380 381 // Parse weighting factors (see spec). 382 Result ParseWeightingFactors(int num_ref_idx_active_minus1, 383 int chroma_array_type, 384 int luma_log2_weight_denom, 385 int chroma_log2_weight_denom, 386 H264WeightingFactors* w_facts); 387 388 // Parse decoded reference picture marking information (see spec). 389 Result ParseDecRefPicMarking(H264SliceHeader* shdr); 390 391 // Pointer to the current NALU in the stream. 392 const uint8* stream_; 393 394 // Bytes left in the stream after the current NALU. 395 off_t bytes_left_; 396 397 H264BitReader br_; 398 399 // PPSes and SPSes stored for future reference. 400 typedef std::map<int, H264SPS*> SPSById; 401 typedef std::map<int, H264PPS*> PPSById; 402 SPSById active_SPSes_; 403 PPSById active_PPSes_; 404 405 DISALLOW_COPY_AND_ASSIGN(H264Parser); 406 }; 407 408 } // namespace media 409 410 #endif // MEDIA_FILTERS_H264_PARSER_H_ 411