1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file contains an implementation of an H264 Annex-B video stream parser. 6 // Note: ported from Chromium commit head: 600904374759 7 // Note: GetColorSpace() is not ported. 8 9 #ifndef H264_PARSER_H_ 10 #define H264_PARSER_H_ 11 12 #include <stddef.h> 13 #include <stdint.h> 14 #include <sys/types.h> 15 16 #include <map> 17 #include <memory> 18 #include <vector> 19 20 #include "base/macros.h" 21 #include "base/optional.h" 22 #include "h264_bit_reader.h" 23 #include "ranges.h" 24 #include "rect.h" 25 #include "size.h" 26 #include "subsample_entry.h" 27 #include "video_codecs.h" 28 29 namespace media { 30 31 struct SubsampleEntry; 32 33 // For explanations of each struct and its members, see H.264 specification 34 // at http://www.itu.int/rec/T-REC-H.264. 35 struct H264NALU { 36 H264NALU(); 37 38 enum Type { 39 kUnspecified = 0, 40 kNonIDRSlice = 1, 41 kSliceDataA = 2, 42 kSliceDataB = 3, 43 kSliceDataC = 4, 44 kIDRSlice = 5, 45 kSEIMessage = 6, 46 kSPS = 7, 47 kPPS = 8, 48 kAUD = 9, 49 kEOSeq = 10, 50 kEOStream = 11, 51 kFiller = 12, 52 kSPSExt = 13, 53 kReserved14 = 14, 54 kReserved15 = 15, 55 kReserved16 = 16, 56 kReserved17 = 17, 57 kReserved18 = 18, 58 kCodedSliceAux = 19, 59 kCodedSliceExtension = 20, 60 }; 61 62 // After (without) start code; we don't own the underlying memory 63 // and a shallow copy should be made when copying this struct. 64 const uint8_t* data; 65 off_t size; // From after start code to start code of next NALU (or EOS). 66 67 int nal_ref_idc; 68 int nal_unit_type; 69 }; 70 71 enum { 72 kH264ScalingList4x4Length = 16, 73 kH264ScalingList8x8Length = 64, 74 }; 75 76 struct H264SPS { 77 H264SPS(); 78 79 enum H264ProfileIDC { 80 kProfileIDCBaseline = 66, 81 kProfileIDCConstrainedBaseline = kProfileIDCBaseline, 82 kProfileIDCMain = 77, 83 kProfileIDScalableBaseline = 83, 84 kProfileIDScalableHigh = 86, 85 kProfileIDCHigh = 100, 86 kProfileIDHigh10 = 110, 87 kProfileIDSMultiviewHigh = 118, 88 kProfileIDHigh422 = 122, 89 kProfileIDStereoHigh = 128, 90 kProfileIDHigh444Predictive = 244, 91 }; 92 93 enum H264LevelIDC : uint8_t { 94 kLevelIDC1p0 = 10, 95 kLevelIDC1B = 9, 96 kLevelIDC1p1 = 11, 97 kLevelIDC1p2 = 12, 98 kLevelIDC1p3 = 13, 99 kLevelIDC2p0 = 20, 100 kLevelIDC2p1 = 21, 101 kLevelIDC2p2 = 22, 102 kLevelIDC3p0 = 30, 103 kLevelIDC3p1 = 31, 104 kLevelIDC3p2 = 32, 105 kLevelIDC4p0 = 40, 106 kLevelIDC4p1 = 41, 107 kLevelIDC4p2 = 42, 108 kLevelIDC5p0 = 50, 109 kLevelIDC5p1 = 51, 110 kLevelIDC5p2 = 52, 111 kLevelIDC6p0 = 60, 112 kLevelIDC6p1 = 61, 113 kLevelIDC6p2 = 62, 114 }; 115 116 enum AspectRatioIdc { 117 kExtendedSar = 255, 118 }; 119 120 enum { 121 // Constants for HRD parameters (spec ch. E.2.2). 122 kBitRateScaleConstantTerm = 6, // Equation E-37. 123 kCPBSizeScaleConstantTerm = 4, // Equation E-38. 124 kDefaultInitialCPBRemovalDelayLength = 24, 125 kDefaultDPBOutputDelayLength = 24, 126 kDefaultTimeOffsetLength = 24, 127 }; 128 129 int profile_idc; 130 bool constraint_set0_flag; 131 bool constraint_set1_flag; 132 bool constraint_set2_flag; 133 bool constraint_set3_flag; 134 bool constraint_set4_flag; 135 bool constraint_set5_flag; 136 int level_idc; 137 int seq_parameter_set_id; 138 139 int chroma_format_idc; 140 bool separate_colour_plane_flag; 141 int bit_depth_luma_minus8; 142 int bit_depth_chroma_minus8; 143 bool qpprime_y_zero_transform_bypass_flag; 144 145 bool seq_scaling_matrix_present_flag; 146 int scaling_list4x4[6][kH264ScalingList4x4Length]; 147 int scaling_list8x8[6][kH264ScalingList8x8Length]; 148 149 int log2_max_frame_num_minus4; 150 int pic_order_cnt_type; 151 int log2_max_pic_order_cnt_lsb_minus4; 152 bool delta_pic_order_always_zero_flag; 153 int offset_for_non_ref_pic; 154 int offset_for_top_to_bottom_field; 155 int num_ref_frames_in_pic_order_cnt_cycle; 156 int expected_delta_per_pic_order_cnt_cycle; // calculated 157 int offset_for_ref_frame[255]; 158 int max_num_ref_frames; 159 bool gaps_in_frame_num_value_allowed_flag; 160 int pic_width_in_mbs_minus1; 161 int pic_height_in_map_units_minus1; 162 bool frame_mbs_only_flag; 163 bool mb_adaptive_frame_field_flag; 164 bool direct_8x8_inference_flag; 165 bool frame_cropping_flag; 166 int frame_crop_left_offset; 167 int frame_crop_right_offset; 168 int frame_crop_top_offset; 169 int frame_crop_bottom_offset; 170 171 bool vui_parameters_present_flag; 172 int sar_width; // Set to 0 when not specified. 173 int sar_height; // Set to 0 when not specified. 174 bool bitstream_restriction_flag; 175 int max_num_reorder_frames; 176 int max_dec_frame_buffering; 177 bool timing_info_present_flag; 178 int num_units_in_tick; 179 int time_scale; 180 bool fixed_frame_rate_flag; 181 182 bool video_signal_type_present_flag; 183 int video_format; 184 bool video_full_range_flag; 185 bool colour_description_present_flag; 186 int colour_primaries; 187 int transfer_characteristics; 188 int matrix_coefficients; 189 190 // TODO(posciak): actually parse these instead of ParseAndIgnoreHRDParameters. 191 bool nal_hrd_parameters_present_flag; 192 int cpb_cnt_minus1; 193 int bit_rate_scale; 194 int cpb_size_scale; 195 int bit_rate_value_minus1[32]; 196 int cpb_size_value_minus1[32]; 197 bool cbr_flag[32]; 198 int initial_cpb_removal_delay_length_minus_1; 199 int cpb_removal_delay_length_minus1; 200 int dpb_output_delay_length_minus1; 201 int time_offset_length; 202 203 bool low_delay_hrd_flag; 204 205 int chroma_array_type; 206 207 // Get corresponding SPS |level_idc| and |constraint_set3_flag| value from 208 // requested |profile| and |level| (see Spec A.3.1). 209 static void GetLevelConfigFromProfileLevel(VideoCodecProfile profile, 210 uint8_t level, 211 int* level_idc, 212 bool* constraint_set3_flag); 213 214 // Helpers to compute frequently-used values. These methods return 215 // base::nullopt if they encounter integer overflow. They do not verify that 216 // the results are in-spec for the given profile or level. 217 base::Optional<Size> GetCodedSize() const; 218 base::Optional<Rect> GetVisibleRect() const; 219 220 // Helper to compute indicated level from parsed SPS data. The value of 221 // indicated level would be included in H264LevelIDC enum representing the 222 // level as in name. 223 uint8_t GetIndicatedLevel() const; 224 // Helper to check if indicated level is lower than or equal to 225 // |target_level|. 226 bool CheckIndicatedLevelWithinTarget(uint8_t target_level) const; 227 }; 228 229 struct H264PPS { 230 H264PPS(); 231 232 int pic_parameter_set_id; 233 int seq_parameter_set_id; 234 bool entropy_coding_mode_flag; 235 bool bottom_field_pic_order_in_frame_present_flag; 236 int num_slice_groups_minus1; 237 // TODO(posciak): Slice groups not implemented, could be added at some point. 238 int num_ref_idx_l0_default_active_minus1; 239 int num_ref_idx_l1_default_active_minus1; 240 bool weighted_pred_flag; 241 int weighted_bipred_idc; 242 int pic_init_qp_minus26; 243 int pic_init_qs_minus26; 244 int chroma_qp_index_offset; 245 bool deblocking_filter_control_present_flag; 246 bool constrained_intra_pred_flag; 247 bool redundant_pic_cnt_present_flag; 248 bool transform_8x8_mode_flag; 249 250 bool pic_scaling_matrix_present_flag; 251 int scaling_list4x4[6][kH264ScalingList4x4Length]; 252 int scaling_list8x8[6][kH264ScalingList8x8Length]; 253 254 int second_chroma_qp_index_offset; 255 }; 256 257 struct H264ModificationOfPicNum { 258 int modification_of_pic_nums_idc; 259 union { 260 int abs_diff_pic_num_minus1; 261 int long_term_pic_num; 262 }; 263 }; 264 265 struct H264WeightingFactors { 266 bool luma_weight_flag; 267 bool chroma_weight_flag; 268 int luma_weight[32]; 269 int luma_offset[32]; 270 int chroma_weight[32][2]; 271 int chroma_offset[32][2]; 272 }; 273 274 struct H264DecRefPicMarking { 275 int memory_mgmnt_control_operation; 276 int difference_of_pic_nums_minus1; 277 int long_term_pic_num; 278 int long_term_frame_idx; 279 int max_long_term_frame_idx_plus1; 280 }; 281 282 struct H264SliceHeader { 283 H264SliceHeader(); 284 285 enum { kRefListSize = 32, kRefListModSize = kRefListSize }; 286 287 enum Type { 288 kPSlice = 0, 289 kBSlice = 1, 290 kISlice = 2, 291 kSPSlice = 3, 292 kSISlice = 4, 293 }; 294 295 bool IsPSlice() const; 296 bool IsBSlice() const; 297 bool IsISlice() const; 298 bool IsSPSlice() const; 299 bool IsSISlice() const; 300 301 bool idr_pic_flag; // from NAL header 302 int nal_ref_idc; // from NAL header 303 const uint8_t* nalu_data; // from NAL header 304 off_t nalu_size; // from NAL header 305 off_t header_bit_size; // calculated 306 307 int first_mb_in_slice; 308 int slice_type; 309 int pic_parameter_set_id; 310 int colour_plane_id; // TODO(posciak): use this! http://crbug.com/139878 311 int frame_num; 312 bool field_pic_flag; 313 bool bottom_field_flag; 314 int idr_pic_id; 315 int pic_order_cnt_lsb; 316 int delta_pic_order_cnt_bottom; 317 int delta_pic_order_cnt0; 318 int delta_pic_order_cnt1; 319 int redundant_pic_cnt; 320 bool direct_spatial_mv_pred_flag; 321 322 bool num_ref_idx_active_override_flag; 323 int num_ref_idx_l0_active_minus1; 324 int num_ref_idx_l1_active_minus1; 325 bool ref_pic_list_modification_flag_l0; 326 bool ref_pic_list_modification_flag_l1; 327 H264ModificationOfPicNum ref_list_l0_modifications[kRefListModSize]; 328 H264ModificationOfPicNum ref_list_l1_modifications[kRefListModSize]; 329 330 int luma_log2_weight_denom; 331 int chroma_log2_weight_denom; 332 333 bool luma_weight_l0_flag; 334 bool chroma_weight_l0_flag; 335 H264WeightingFactors pred_weight_table_l0; 336 337 bool luma_weight_l1_flag; 338 bool chroma_weight_l1_flag; 339 H264WeightingFactors pred_weight_table_l1; 340 341 bool no_output_of_prior_pics_flag; 342 bool long_term_reference_flag; 343 344 bool adaptive_ref_pic_marking_mode_flag; 345 H264DecRefPicMarking ref_pic_marking[kRefListSize]; 346 347 int cabac_init_idc; 348 int slice_qp_delta; 349 bool sp_for_switch_flag; 350 int slice_qs_delta; 351 int disable_deblocking_filter_idc; 352 int slice_alpha_c0_offset_div2; 353 int slice_beta_offset_div2; 354 355 // Calculated. 356 // Size in bits of dec_ref_pic_marking() syntax element. 357 size_t dec_ref_pic_marking_bit_size; 358 size_t pic_order_cnt_bit_size; 359 }; 360 361 struct H264SEIRecoveryPoint { 362 int recovery_frame_cnt; 363 bool exact_match_flag; 364 bool broken_link_flag; 365 int changing_slice_group_idc; 366 }; 367 368 struct H264SEIMessage { 369 H264SEIMessage(); 370 371 enum Type { 372 kSEIRecoveryPoint = 6, 373 }; 374 375 int type; 376 int payload_size; 377 union { 378 // Placeholder; in future more supported types will contribute to more 379 // union members here. 380 H264SEIRecoveryPoint recovery_point; 381 }; 382 }; 383 384 // Class to parse an Annex-B H.264 stream, 385 // as specified in chapters 7 and Annex B of the H.264 spec. 386 class H264Parser { 387 public: 388 enum Result { 389 kOk, 390 kInvalidStream, // error in stream 391 kUnsupportedStream, // stream not supported by the parser 392 kEOStream, // end of stream 393 }; 394 395 // Find offset from start of data to next NALU start code 396 // and size of found start code (3 or 4 bytes). 397 // If no start code is found, offset is pointing to the first unprocessed byte 398 // (i.e. the first byte that was not considered as a possible start of a start 399 // code) and |*start_code_size| is set to 0. 400 // Preconditions: 401 // - |data_size| >= 0 402 // Postconditions: 403 // - |*offset| is between 0 and |data_size| included. 404 // It is strictly less than |data_size| if |data_size| > 0. 405 // - |*start_code_size| is either 0, 3 or 4. 406 static bool FindStartCode(const uint8_t* data, 407 off_t data_size, 408 off_t* offset, 409 off_t* start_code_size); 410 411 // Wrapper for FindStartCode() that skips over start codes that 412 // may appear inside of |encrypted_ranges_|. 413 // Returns true if a start code was found. Otherwise returns false. 414 static bool FindStartCodeInClearRanges(const uint8_t* data, 415 off_t data_size, 416 const Ranges<const uint8_t*>& ranges, 417 off_t* offset, 418 off_t* start_code_size); 419 420 static VideoCodecProfile ProfileIDCToVideoCodecProfile(int profile_idc); 421 422 // Parses the input stream and returns all the NALUs through |nalus|. Returns 423 // false if the stream is invalid. 424 static bool ParseNALUs(const uint8_t* stream, 425 size_t stream_size, 426 std::vector<H264NALU>* nalus); 427 428 H264Parser(); 429 ~H264Parser(); 430 431 void Reset(); 432 // Set current stream pointer to |stream| of |stream_size| in bytes, 433 // |stream| owned by caller. 434 // |subsamples| contains information about what parts of |stream| are 435 // encrypted. 436 void SetStream(const uint8_t* stream, off_t stream_size); 437 void SetEncryptedStream(const uint8_t* stream, 438 off_t stream_size, 439 const std::vector<SubsampleEntry>& subsamples); 440 441 // Read the stream to find the next NALU, identify it and return 442 // that information in |*nalu|. This advances the stream to the beginning 443 // of this NALU, but not past it, so subsequent calls to NALU-specific 444 // parsing functions (ParseSPS, etc.) will parse this NALU. 445 // If the caller wishes to skip the current NALU, it can call this function 446 // again, instead of any NALU-type specific parse functions below. 447 Result AdvanceToNextNALU(H264NALU* nalu); 448 449 // NALU-specific parsing functions. 450 // These should be called after AdvanceToNextNALU(). 451 452 // SPSes and PPSes are owned by the parser class and the memory for their 453 // structures is managed here, not by the caller, as they are reused 454 // across NALUs. 455 // 456 // Parse an SPS/PPS NALU and save their data in the parser, returning id 457 // of the parsed structure in |*pps_id|/|*sps_id|. 458 // To get a pointer to a given SPS/PPS structure, use GetSPS()/GetPPS(), 459 // passing the returned |*sps_id|/|*pps_id| as parameter. 460 // TODO(posciak,fischman): consider replacing returning Result from Parse*() 461 // methods with a scoped_ptr and adding an AtEOS() function to check for EOS 462 // if Parse*() return NULL. 463 Result ParseSPS(int* sps_id); 464 Result ParsePPS(int* pps_id); 465 466 // Parses the SPS ID from the SPSExt, but otherwise does nothing. 467 Result ParseSPSExt(int* sps_id); 468 469 // Return a pointer to SPS/PPS with given |sps_id|/|pps_id| or NULL if not 470 // present. 471 const H264SPS* GetSPS(int sps_id) const; 472 const H264PPS* GetPPS(int pps_id) const; 473 474 // Slice headers and SEI messages are not used across NALUs by the parser 475 // and can be discarded after current NALU, so the parser does not store 476 // them, nor does it manage their memory. 477 // The caller has to provide and manage it instead. 478 479 // Parse a slice header, returning it in |*shdr|. |*nalu| must be set to 480 // the NALU returned from AdvanceToNextNALU() and corresponding to |*shdr|. 481 Result ParseSliceHeader(const H264NALU& nalu, H264SliceHeader* shdr); 482 483 // Parse a SEI message, returning it in |*sei_msg|, provided and managed 484 // by the caller. 485 Result ParseSEI(H264SEIMessage* sei_msg); 486 487 // The return value of this method changes for every successful call to 488 // AdvanceToNextNALU(). 489 // This returns the subsample information for the last NALU that was output 490 // from AdvanceToNextNALU(). 491 std::vector<SubsampleEntry> GetCurrentSubsamples(); 492 493 private: 494 // Move the stream pointer to the beginning of the next NALU, 495 // i.e. pointing at the next start code. 496 // Return true if a NALU has been found. 497 // If a NALU is found: 498 // - its size in bytes is returned in |*nalu_size| and includes 499 // the start code as well as the trailing zero bits. 500 // - the size in bytes of the start code is returned in |*start_code_size|. 501 bool LocateNALU(off_t* nalu_size, off_t* start_code_size); 502 503 // Exp-Golomb code parsing as specified in chapter 9.1 of the spec. 504 // Read one unsigned exp-Golomb code from the stream and return in |*val|. 505 Result ReadUE(int* val); 506 507 // Read one signed exp-Golomb code from the stream and return in |*val|. 508 Result ReadSE(int* val); 509 510 // Parse scaling lists (see spec). 511 Result ParseScalingList(int size, int* scaling_list, bool* use_default); 512 Result ParseSPSScalingLists(H264SPS* sps); 513 Result ParsePPSScalingLists(const H264SPS& sps, H264PPS* pps); 514 515 // Parse optional VUI parameters in SPS (see spec). 516 Result ParseVUIParameters(H264SPS* sps); 517 // Set |hrd_parameters_present| to true only if they are present. 518 Result ParseAndIgnoreHRDParameters(bool* hrd_parameters_present); 519 520 // Parse reference picture lists' modifications (see spec). 521 Result ParseRefPicListModifications(H264SliceHeader* shdr); 522 Result ParseRefPicListModification(int num_ref_idx_active_minus1, 523 H264ModificationOfPicNum* ref_list_mods); 524 525 // Parse prediction weight table (see spec). 526 Result ParsePredWeightTable(const H264SPS& sps, H264SliceHeader* shdr); 527 528 // Parse weighting factors (see spec). 529 Result ParseWeightingFactors(int num_ref_idx_active_minus1, 530 int chroma_array_type, 531 int luma_log2_weight_denom, 532 int chroma_log2_weight_denom, 533 H264WeightingFactors* w_facts); 534 535 // Parse decoded reference picture marking information (see spec). 536 Result ParseDecRefPicMarking(H264SliceHeader* shdr); 537 538 // Pointer to the current NALU in the stream. 539 const uint8_t* stream_; 540 541 // Bytes left in the stream after the current NALU. 542 off_t bytes_left_; 543 544 H264BitReader br_; 545 546 // PPSes and SPSes stored for future reference. 547 std::map<int, std::unique_ptr<H264SPS>> active_SPSes_; 548 std::map<int, std::unique_ptr<H264PPS>> active_PPSes_; 549 550 // Ranges of encrypted bytes in the buffer passed to 551 // SetEncryptedStream(). 552 Ranges<const uint8_t*> encrypted_ranges_; 553 554 // This contains the range of the previous NALU found in 555 // AdvanceToNextNalu(). Holds exactly one range. 556 Ranges<const uint8_t*> previous_nalu_range_; 557 558 DISALLOW_COPY_AND_ASSIGN(H264Parser); 559 }; 560 561 } // namespace media 562 563 #endif // H264_PARSER_H_ 564