1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/video_coding/codecs/test/videocodec_test_stats_impl.h"
12
13 #include <algorithm>
14 #include <cmath>
15 #include <iterator>
16 #include <limits>
17 #include <numeric>
18
19 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
20 #include "rtc_base/checks.h"
21 #include "rtc_base/numerics/running_statistics.h"
22 #include "rtc_base/strings/string_builder.h"
23
24 namespace webrtc {
25 namespace test {
26
27 using FrameStatistics = VideoCodecTestStats::FrameStatistics;
28 using VideoStatistics = VideoCodecTestStats::VideoStatistics;
29
30 namespace {
31 const int kMaxBitrateMismatchPercent = 20;
32 }
33
34 VideoCodecTestStatsImpl::VideoCodecTestStatsImpl() = default;
35 VideoCodecTestStatsImpl::~VideoCodecTestStatsImpl() = default;
36
AddFrame(const FrameStatistics & frame_stat)37 void VideoCodecTestStatsImpl::AddFrame(const FrameStatistics& frame_stat) {
38 const size_t timestamp = frame_stat.rtp_timestamp;
39 const size_t layer_idx = frame_stat.spatial_idx;
40 RTC_DCHECK(rtp_timestamp_to_frame_num_[layer_idx].find(timestamp) ==
41 rtp_timestamp_to_frame_num_[layer_idx].end());
42 rtp_timestamp_to_frame_num_[layer_idx][timestamp] = frame_stat.frame_number;
43 layer_stats_[layer_idx].push_back(frame_stat);
44 }
45
GetFrame(size_t frame_num,size_t layer_idx)46 FrameStatistics* VideoCodecTestStatsImpl::GetFrame(size_t frame_num,
47 size_t layer_idx) {
48 RTC_CHECK_LT(frame_num, layer_stats_[layer_idx].size());
49 return &layer_stats_[layer_idx][frame_num];
50 }
51
GetFrameWithTimestamp(size_t timestamp,size_t layer_idx)52 FrameStatistics* VideoCodecTestStatsImpl::GetFrameWithTimestamp(
53 size_t timestamp,
54 size_t layer_idx) {
55 RTC_DCHECK(rtp_timestamp_to_frame_num_[layer_idx].find(timestamp) !=
56 rtp_timestamp_to_frame_num_[layer_idx].end());
57
58 return GetFrame(rtp_timestamp_to_frame_num_[layer_idx][timestamp], layer_idx);
59 }
60
GetFrameStatistics()61 std::vector<FrameStatistics> VideoCodecTestStatsImpl::GetFrameStatistics() {
62 size_t capacity = 0;
63 for (const auto& layer_stat : layer_stats_) {
64 capacity += layer_stat.second.size();
65 }
66
67 std::vector<FrameStatistics> frame_statistics;
68 frame_statistics.reserve(capacity);
69 for (const auto& layer_stat : layer_stats_) {
70 std::copy(layer_stat.second.cbegin(), layer_stat.second.cend(),
71 std::back_inserter(frame_statistics));
72 }
73
74 return frame_statistics;
75 }
76
77 std::vector<VideoStatistics>
SliceAndCalcLayerVideoStatistic(size_t first_frame_num,size_t last_frame_num)78 VideoCodecTestStatsImpl::SliceAndCalcLayerVideoStatistic(
79 size_t first_frame_num,
80 size_t last_frame_num) {
81 std::vector<VideoStatistics> layer_stats;
82
83 size_t num_spatial_layers = 0;
84 size_t num_temporal_layers = 0;
85 GetNumberOfEncodedLayers(first_frame_num, last_frame_num, &num_spatial_layers,
86 &num_temporal_layers);
87 RTC_CHECK_GT(num_spatial_layers, 0);
88 RTC_CHECK_GT(num_temporal_layers, 0);
89
90 for (size_t spatial_idx = 0; spatial_idx < num_spatial_layers;
91 ++spatial_idx) {
92 for (size_t temporal_idx = 0; temporal_idx < num_temporal_layers;
93 ++temporal_idx) {
94 VideoStatistics layer_stat = SliceAndCalcVideoStatistic(
95 first_frame_num, last_frame_num, spatial_idx, temporal_idx, false);
96 layer_stats.push_back(layer_stat);
97 }
98 }
99
100 return layer_stats;
101 }
102
SliceAndCalcAggregatedVideoStatistic(size_t first_frame_num,size_t last_frame_num)103 VideoStatistics VideoCodecTestStatsImpl::SliceAndCalcAggregatedVideoStatistic(
104 size_t first_frame_num,
105 size_t last_frame_num) {
106 size_t num_spatial_layers = 0;
107 size_t num_temporal_layers = 0;
108 GetNumberOfEncodedLayers(first_frame_num, last_frame_num, &num_spatial_layers,
109 &num_temporal_layers);
110 RTC_CHECK_GT(num_spatial_layers, 0);
111 RTC_CHECK_GT(num_temporal_layers, 0);
112
113 return SliceAndCalcVideoStatistic(first_frame_num, last_frame_num,
114 num_spatial_layers - 1,
115 num_temporal_layers - 1, true);
116 }
117
Size(size_t spatial_idx)118 size_t VideoCodecTestStatsImpl::Size(size_t spatial_idx) {
119 return layer_stats_[spatial_idx].size();
120 }
121
Clear()122 void VideoCodecTestStatsImpl::Clear() {
123 layer_stats_.clear();
124 rtp_timestamp_to_frame_num_.clear();
125 }
126
AggregateFrameStatistic(size_t frame_num,size_t spatial_idx,bool aggregate_independent_layers)127 FrameStatistics VideoCodecTestStatsImpl::AggregateFrameStatistic(
128 size_t frame_num,
129 size_t spatial_idx,
130 bool aggregate_independent_layers) {
131 FrameStatistics frame_stat = *GetFrame(frame_num, spatial_idx);
132 bool inter_layer_predicted = frame_stat.inter_layer_predicted;
133 while (spatial_idx-- > 0) {
134 if (aggregate_independent_layers || inter_layer_predicted) {
135 FrameStatistics* base_frame_stat = GetFrame(frame_num, spatial_idx);
136 frame_stat.length_bytes += base_frame_stat->length_bytes;
137 frame_stat.target_bitrate_kbps += base_frame_stat->target_bitrate_kbps;
138
139 inter_layer_predicted = base_frame_stat->inter_layer_predicted;
140 }
141 }
142
143 return frame_stat;
144 }
145
CalcLayerTargetBitrateKbps(size_t first_frame_num,size_t last_frame_num,size_t spatial_idx,size_t temporal_idx,bool aggregate_independent_layers)146 size_t VideoCodecTestStatsImpl::CalcLayerTargetBitrateKbps(
147 size_t first_frame_num,
148 size_t last_frame_num,
149 size_t spatial_idx,
150 size_t temporal_idx,
151 bool aggregate_independent_layers) {
152 size_t target_bitrate_kbps = 0;
153
154 // We don't know if superframe includes all required spatial layers because
155 // of possible frame drops. Run through all frames in specified range, find
156 // and return maximum target bitrate. Assume that target bitrate in frame
157 // statistic is specified per temporal layer.
158 for (size_t frame_num = first_frame_num; frame_num <= last_frame_num;
159 ++frame_num) {
160 FrameStatistics superframe = AggregateFrameStatistic(
161 frame_num, spatial_idx, aggregate_independent_layers);
162
163 if (superframe.temporal_idx <= temporal_idx) {
164 target_bitrate_kbps =
165 std::max(target_bitrate_kbps, superframe.target_bitrate_kbps);
166 }
167 }
168
169 RTC_DCHECK_GT(target_bitrate_kbps, 0);
170 return target_bitrate_kbps;
171 }
172
SliceAndCalcVideoStatistic(size_t first_frame_num,size_t last_frame_num,size_t spatial_idx,size_t temporal_idx,bool aggregate_independent_layers)173 VideoStatistics VideoCodecTestStatsImpl::SliceAndCalcVideoStatistic(
174 size_t first_frame_num,
175 size_t last_frame_num,
176 size_t spatial_idx,
177 size_t temporal_idx,
178 bool aggregate_independent_layers) {
179 VideoStatistics video_stat;
180
181 float buffer_level_bits = 0.0f;
182 RunningStatistics<float> buffer_level_sec;
183
184 RunningStatistics<size_t> key_frame_size_bytes;
185 RunningStatistics<size_t> delta_frame_size_bytes;
186
187 RunningStatistics<size_t> frame_encoding_time_us;
188 RunningStatistics<size_t> frame_decoding_time_us;
189
190 RunningStatistics<float> psnr_y;
191 RunningStatistics<float> psnr_u;
192 RunningStatistics<float> psnr_v;
193 RunningStatistics<float> psnr;
194 RunningStatistics<float> ssim;
195 RunningStatistics<int> qp;
196
197 size_t rtp_timestamp_first_frame = 0;
198 size_t rtp_timestamp_prev_frame = 0;
199
200 FrameStatistics last_successfully_decoded_frame(0, 0, 0);
201
202 const size_t target_bitrate_kbps =
203 CalcLayerTargetBitrateKbps(first_frame_num, last_frame_num, spatial_idx,
204 temporal_idx, aggregate_independent_layers);
205 RTC_CHECK_GT(target_bitrate_kbps, 0); // We divide by |target_bitrate_kbps|.
206
207 for (size_t frame_num = first_frame_num; frame_num <= last_frame_num;
208 ++frame_num) {
209 FrameStatistics frame_stat = AggregateFrameStatistic(
210 frame_num, spatial_idx, aggregate_independent_layers);
211
212 float time_since_first_frame_sec =
213 1.0f * (frame_stat.rtp_timestamp - rtp_timestamp_first_frame) /
214 kVideoPayloadTypeFrequency;
215 float time_since_prev_frame_sec =
216 1.0f * (frame_stat.rtp_timestamp - rtp_timestamp_prev_frame) /
217 kVideoPayloadTypeFrequency;
218
219 if (frame_stat.temporal_idx > temporal_idx) {
220 continue;
221 }
222
223 buffer_level_bits -= time_since_prev_frame_sec * 1000 * target_bitrate_kbps;
224 buffer_level_bits = std::max(0.0f, buffer_level_bits);
225 buffer_level_bits += 8.0 * frame_stat.length_bytes;
226 buffer_level_sec.AddSample(buffer_level_bits /
227 (1000 * target_bitrate_kbps));
228
229 video_stat.length_bytes += frame_stat.length_bytes;
230
231 if (frame_stat.encoding_successful) {
232 ++video_stat.num_encoded_frames;
233
234 if (frame_stat.frame_type == VideoFrameType::kVideoFrameKey) {
235 key_frame_size_bytes.AddSample(frame_stat.length_bytes);
236 ++video_stat.num_key_frames;
237 } else {
238 delta_frame_size_bytes.AddSample(frame_stat.length_bytes);
239 }
240
241 frame_encoding_time_us.AddSample(frame_stat.encode_time_us);
242 qp.AddSample(frame_stat.qp);
243
244 video_stat.max_nalu_size_bytes = std::max(video_stat.max_nalu_size_bytes,
245 frame_stat.max_nalu_size_bytes);
246 }
247
248 if (frame_stat.decoding_successful) {
249 ++video_stat.num_decoded_frames;
250
251 video_stat.width = std::max(video_stat.width, frame_stat.decoded_width);
252 video_stat.height =
253 std::max(video_stat.height, frame_stat.decoded_height);
254
255 psnr_y.AddSample(frame_stat.psnr_y);
256 psnr_u.AddSample(frame_stat.psnr_u);
257 psnr_v.AddSample(frame_stat.psnr_v);
258 psnr.AddSample(frame_stat.psnr);
259 ssim.AddSample(frame_stat.ssim);
260
261 if (video_stat.num_decoded_frames > 1) {
262 if (last_successfully_decoded_frame.decoded_width !=
263 frame_stat.decoded_width ||
264 last_successfully_decoded_frame.decoded_height !=
265 frame_stat.decoded_height) {
266 ++video_stat.num_spatial_resizes;
267 }
268 }
269
270 frame_decoding_time_us.AddSample(frame_stat.decode_time_us);
271 last_successfully_decoded_frame = frame_stat;
272 }
273
274 if (video_stat.num_input_frames > 0) {
275 if (video_stat.time_to_reach_target_bitrate_sec == 0.0f) {
276 RTC_CHECK_GT(time_since_first_frame_sec, 0);
277 const float curr_kbps =
278 8.0 * video_stat.length_bytes / 1000 / time_since_first_frame_sec;
279 const float bitrate_mismatch_percent =
280 100 * std::fabs(curr_kbps - target_bitrate_kbps) /
281 target_bitrate_kbps;
282 if (bitrate_mismatch_percent < kMaxBitrateMismatchPercent) {
283 video_stat.time_to_reach_target_bitrate_sec =
284 time_since_first_frame_sec;
285 }
286 }
287 }
288
289 rtp_timestamp_prev_frame = frame_stat.rtp_timestamp;
290 if (video_stat.num_input_frames == 0) {
291 rtp_timestamp_first_frame = frame_stat.rtp_timestamp;
292 }
293
294 ++video_stat.num_input_frames;
295 }
296
297 const size_t num_frames = last_frame_num - first_frame_num + 1;
298 const size_t timestamp_delta =
299 GetFrame(first_frame_num + 1, spatial_idx)->rtp_timestamp -
300 GetFrame(first_frame_num, spatial_idx)->rtp_timestamp;
301 RTC_CHECK_GT(timestamp_delta, 0);
302 const float input_framerate_fps =
303 1.0 * kVideoPayloadTypeFrequency / timestamp_delta;
304 RTC_CHECK_GT(input_framerate_fps, 0);
305 const float duration_sec = num_frames / input_framerate_fps;
306
307 video_stat.target_bitrate_kbps = target_bitrate_kbps;
308 video_stat.input_framerate_fps = input_framerate_fps;
309
310 video_stat.spatial_idx = spatial_idx;
311 video_stat.temporal_idx = temporal_idx;
312
313 RTC_CHECK_GT(duration_sec, 0);
314 video_stat.bitrate_kbps =
315 static_cast<size_t>(8 * video_stat.length_bytes / 1000 / duration_sec);
316 video_stat.framerate_fps = video_stat.num_encoded_frames / duration_sec;
317
318 // http://bugs.webrtc.org/10400: On Windows, we only get millisecond
319 // granularity in the frame encode/decode timing measurements.
320 // So we need to softly avoid a div-by-zero here.
321 const float mean_encode_time_us =
322 frame_encoding_time_us.GetMean().value_or(0);
323 video_stat.enc_speed_fps = mean_encode_time_us > 0.0f
324 ? 1000000.0f / mean_encode_time_us
325 : std::numeric_limits<float>::max();
326 const float mean_decode_time_us =
327 frame_decoding_time_us.GetMean().value_or(0);
328 video_stat.dec_speed_fps = mean_decode_time_us > 0.0f
329 ? 1000000.0f / mean_decode_time_us
330 : std::numeric_limits<float>::max();
331
332 auto MaxDelaySec =
333 [target_bitrate_kbps](const RunningStatistics<size_t>& stats) {
334 return 8 * stats.GetMax().value_or(0) / 1000 / target_bitrate_kbps;
335 };
336
337 video_stat.avg_delay_sec = buffer_level_sec.GetMean().value_or(0);
338 video_stat.max_key_frame_delay_sec = MaxDelaySec(key_frame_size_bytes);
339 video_stat.max_delta_frame_delay_sec = MaxDelaySec(key_frame_size_bytes);
340
341 video_stat.avg_key_frame_size_bytes =
342 key_frame_size_bytes.GetMean().value_or(0);
343 video_stat.avg_delta_frame_size_bytes =
344 delta_frame_size_bytes.GetMean().value_or(0);
345 video_stat.avg_qp = qp.GetMean().value_or(0);
346
347 video_stat.avg_psnr_y = psnr_y.GetMean().value_or(0);
348 video_stat.avg_psnr_u = psnr_u.GetMean().value_or(0);
349 video_stat.avg_psnr_v = psnr_v.GetMean().value_or(0);
350 video_stat.avg_psnr = psnr.GetMean().value_or(0);
351 video_stat.min_psnr =
352 psnr.GetMin().value_or(std::numeric_limits<float>::max());
353 video_stat.avg_ssim = ssim.GetMean().value_or(0);
354 video_stat.min_ssim =
355 ssim.GetMin().value_or(std::numeric_limits<float>::max());
356
357 return video_stat;
358 }
359
GetNumberOfEncodedLayers(size_t first_frame_num,size_t last_frame_num,size_t * num_encoded_spatial_layers,size_t * num_encoded_temporal_layers)360 void VideoCodecTestStatsImpl::GetNumberOfEncodedLayers(
361 size_t first_frame_num,
362 size_t last_frame_num,
363 size_t* num_encoded_spatial_layers,
364 size_t* num_encoded_temporal_layers) {
365 *num_encoded_spatial_layers = 0;
366 *num_encoded_temporal_layers = 0;
367
368 const size_t num_spatial_layers = layer_stats_.size();
369
370 for (size_t frame_num = first_frame_num; frame_num <= last_frame_num;
371 ++frame_num) {
372 for (size_t spatial_idx = 0; spatial_idx < num_spatial_layers;
373 ++spatial_idx) {
374 FrameStatistics* frame_stat = GetFrame(frame_num, spatial_idx);
375 if (frame_stat->encoding_successful) {
376 *num_encoded_spatial_layers =
377 std::max(*num_encoded_spatial_layers, frame_stat->spatial_idx + 1);
378 *num_encoded_temporal_layers = std::max(*num_encoded_temporal_layers,
379 frame_stat->temporal_idx + 1);
380 }
381 }
382 }
383 }
384
385 } // namespace test
386 } // namespace webrtc
387