1 /*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "common_video/h264/sps_parser.h"
12
13 #include <cstdint>
14 #include <vector>
15
16 #include "common_video/h264/h264_common.h"
17 #include "rtc_base/bit_buffer.h"
18
19 namespace {
20 typedef absl::optional<webrtc::SpsParser::SpsState> OptionalSps;
21
22 #define RETURN_EMPTY_ON_FAIL(x) \
23 if (!(x)) { \
24 return OptionalSps(); \
25 }
26
27 constexpr int kScalingDeltaMin = -128;
28 constexpr int kScaldingDeltaMax = 127;
29 } // namespace
30
31 namespace webrtc {
32
33 SpsParser::SpsState::SpsState() = default;
34 SpsParser::SpsState::SpsState(const SpsState&) = default;
35 SpsParser::SpsState::~SpsState() = default;
36
37 // General note: this is based off the 02/2014 version of the H.264 standard.
38 // You can find it on this page:
39 // http://www.itu.int/rec/T-REC-H.264
40
41 // Unpack RBSP and parse SPS state from the supplied buffer.
ParseSps(const uint8_t * data,size_t length)42 absl::optional<SpsParser::SpsState> SpsParser::ParseSps(const uint8_t* data,
43 size_t length) {
44 std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data, length);
45 rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size());
46 return ParseSpsUpToVui(&bit_buffer);
47 }
48
ParseSpsUpToVui(rtc::BitBuffer * buffer)49 absl::optional<SpsParser::SpsState> SpsParser::ParseSpsUpToVui(
50 rtc::BitBuffer* buffer) {
51 // Now, we need to use a bit buffer to parse through the actual AVC SPS
52 // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the
53 // H.264 standard for a complete description.
54 // Since we only care about resolution, we ignore the majority of fields, but
55 // we still have to actively parse through a lot of the data, since many of
56 // the fields have variable size.
57 // We're particularly interested in:
58 // chroma_format_idc -> affects crop units
59 // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
60 // frame_crop_*_offset -> crop information
61
62 SpsState sps;
63
64 // The golomb values we have to read, not just consume.
65 uint32_t golomb_ignored;
66
67 // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is
68 // 0. It defaults to 1, when not specified.
69 uint32_t chroma_format_idc = 1;
70
71 // profile_idc: u(8). We need it to determine if we need to read/skip chroma
72 // formats.
73 uint8_t profile_idc;
74 RETURN_EMPTY_ON_FAIL(buffer->ReadUInt8(&profile_idc));
75 // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
76 // 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
77 RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
78 // level_idc: u(8)
79 RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
80 // seq_parameter_set_id: ue(v)
81 RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.id));
82 sps.separate_colour_plane_flag = 0;
83 // See if profile_idc has chroma format information.
84 if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
85 profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
86 profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
87 profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
88 // chroma_format_idc: ue(v)
89 RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&chroma_format_idc));
90 if (chroma_format_idc == 3) {
91 // separate_colour_plane_flag: u(1)
92 RETURN_EMPTY_ON_FAIL(
93 buffer->ReadBits(&sps.separate_colour_plane_flag, 1));
94 }
95 // bit_depth_luma_minus8: ue(v)
96 RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
97 // bit_depth_chroma_minus8: ue(v)
98 RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
99 // qpprime_y_zero_transform_bypass_flag: u(1)
100 RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
101 // seq_scaling_matrix_present_flag: u(1)
102 uint32_t seq_scaling_matrix_present_flag;
103 RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&seq_scaling_matrix_present_flag, 1));
104 if (seq_scaling_matrix_present_flag) {
105 // Process the scaling lists just enough to be able to properly
106 // skip over them, so we can still read the resolution on streams
107 // where this is included.
108 int scaling_list_count = (chroma_format_idc == 3 ? 12 : 8);
109 for (int i = 0; i < scaling_list_count; ++i) {
110 // seq_scaling_list_present_flag[i] : u(1)
111 uint32_t seq_scaling_list_present_flags;
112 RETURN_EMPTY_ON_FAIL(
113 buffer->ReadBits(&seq_scaling_list_present_flags, 1));
114 if (seq_scaling_list_present_flags != 0) {
115 int last_scale = 8;
116 int next_scale = 8;
117 int size_of_scaling_list = i < 6 ? 16 : 64;
118 for (int j = 0; j < size_of_scaling_list; j++) {
119 if (next_scale != 0) {
120 int32_t delta_scale;
121 // delta_scale: se(v)
122 RETURN_EMPTY_ON_FAIL(
123 buffer->ReadSignedExponentialGolomb(&delta_scale));
124 RETURN_EMPTY_ON_FAIL(delta_scale >= kScalingDeltaMin &&
125 delta_scale <= kScaldingDeltaMax);
126 next_scale = (last_scale + delta_scale + 256) % 256;
127 }
128 if (next_scale != 0)
129 last_scale = next_scale;
130 }
131 }
132 }
133 }
134 }
135 // log2_max_frame_num and log2_max_pic_order_cnt_lsb are used with
136 // BitBuffer::ReadBits, which can read at most 32 bits at a time. We also have
137 // to avoid overflow when adding 4 to the on-wire golomb value, e.g., for evil
138 // input data, ReadExponentialGolomb might return 0xfffc.
139 const uint32_t kMaxLog2Minus4 = 32 - 4;
140
141 // log2_max_frame_num_minus4: ue(v)
142 uint32_t log2_max_frame_num_minus4;
143 if (!buffer->ReadExponentialGolomb(&log2_max_frame_num_minus4) ||
144 log2_max_frame_num_minus4 > kMaxLog2Minus4) {
145 return OptionalSps();
146 }
147 sps.log2_max_frame_num = log2_max_frame_num_minus4 + 4;
148
149 // pic_order_cnt_type: ue(v)
150 RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.pic_order_cnt_type));
151 if (sps.pic_order_cnt_type == 0) {
152 // log2_max_pic_order_cnt_lsb_minus4: ue(v)
153 uint32_t log2_max_pic_order_cnt_lsb_minus4;
154 if (!buffer->ReadExponentialGolomb(&log2_max_pic_order_cnt_lsb_minus4) ||
155 log2_max_pic_order_cnt_lsb_minus4 > kMaxLog2Minus4) {
156 return OptionalSps();
157 }
158 sps.log2_max_pic_order_cnt_lsb = log2_max_pic_order_cnt_lsb_minus4 + 4;
159 } else if (sps.pic_order_cnt_type == 1) {
160 // delta_pic_order_always_zero_flag: u(1)
161 RETURN_EMPTY_ON_FAIL(
162 buffer->ReadBits(&sps.delta_pic_order_always_zero_flag, 1));
163 // offset_for_non_ref_pic: se(v)
164 RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
165 // offset_for_top_to_bottom_field: se(v)
166 RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
167 // num_ref_frames_in_pic_order_cnt_cycle: ue(v)
168 uint32_t num_ref_frames_in_pic_order_cnt_cycle;
169 RETURN_EMPTY_ON_FAIL(
170 buffer->ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
171 for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
172 // offset_for_ref_frame[i]: se(v)
173 RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
174 }
175 }
176 // max_num_ref_frames: ue(v)
177 RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.max_num_ref_frames));
178 // gaps_in_frame_num_value_allowed_flag: u(1)
179 RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
180 //
181 // IMPORTANT ONES! Now we're getting to resolution. First we read the pic
182 // width/height in macroblocks (16x16), which gives us the base resolution,
183 // and then we continue on until we hit the frame crop offsets, which are used
184 // to signify resolutions that aren't multiples of 16.
185 //
186 // pic_width_in_mbs_minus1: ue(v)
187 uint32_t pic_width_in_mbs_minus1;
188 RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&pic_width_in_mbs_minus1));
189 // pic_height_in_map_units_minus1: ue(v)
190 uint32_t pic_height_in_map_units_minus1;
191 RETURN_EMPTY_ON_FAIL(
192 buffer->ReadExponentialGolomb(&pic_height_in_map_units_minus1));
193 // frame_mbs_only_flag: u(1)
194 RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.frame_mbs_only_flag, 1));
195 if (!sps.frame_mbs_only_flag) {
196 // mb_adaptive_frame_field_flag: u(1)
197 RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
198 }
199 // direct_8x8_inference_flag: u(1)
200 RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
201 //
202 // MORE IMPORTANT ONES! Now we're at the frame crop information.
203 //
204 // frame_cropping_flag: u(1)
205 uint32_t frame_cropping_flag;
206 uint32_t frame_crop_left_offset = 0;
207 uint32_t frame_crop_right_offset = 0;
208 uint32_t frame_crop_top_offset = 0;
209 uint32_t frame_crop_bottom_offset = 0;
210 RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&frame_cropping_flag, 1));
211 if (frame_cropping_flag) {
212 // frame_crop_{left, right, top, bottom}_offset: ue(v)
213 RETURN_EMPTY_ON_FAIL(
214 buffer->ReadExponentialGolomb(&frame_crop_left_offset));
215 RETURN_EMPTY_ON_FAIL(
216 buffer->ReadExponentialGolomb(&frame_crop_right_offset));
217 RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&frame_crop_top_offset));
218 RETURN_EMPTY_ON_FAIL(
219 buffer->ReadExponentialGolomb(&frame_crop_bottom_offset));
220 }
221 // vui_parameters_present_flag: u(1)
222 RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.vui_params_present, 1));
223
224 // Far enough! We don't use the rest of the SPS.
225
226 // Start with the resolution determined by the pic_width/pic_height fields.
227 sps.width = 16 * (pic_width_in_mbs_minus1 + 1);
228 sps.height =
229 16 * (2 - sps.frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
230
231 // Figure out the crop units in pixels. That's based on the chroma format's
232 // sampling, which is indicated by chroma_format_idc.
233 if (sps.separate_colour_plane_flag || chroma_format_idc == 0) {
234 frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag);
235 frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag);
236 } else if (!sps.separate_colour_plane_flag && chroma_format_idc > 0) {
237 // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).
238 if (chroma_format_idc == 1 || chroma_format_idc == 2) {
239 frame_crop_left_offset *= 2;
240 frame_crop_right_offset *= 2;
241 }
242 // Height multipliers for format 1 (4:2:0).
243 if (chroma_format_idc == 1) {
244 frame_crop_top_offset *= 2;
245 frame_crop_bottom_offset *= 2;
246 }
247 }
248 // Subtract the crop for each dimension.
249 sps.width -= (frame_crop_left_offset + frame_crop_right_offset);
250 sps.height -= (frame_crop_top_offset + frame_crop_bottom_offset);
251
252 return OptionalSps(sps);
253 }
254
255 } // namespace webrtc
256