1 /*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "rtc_tools/frame_analyzer/video_temporal_aligner.h"
12
13 #include <algorithm>
14 #include <cmath>
15 #include <cstddef>
16 #include <deque>
17 #include <iterator>
18 #include <limits>
19 #include <vector>
20
21 #include "api/video/i420_buffer.h"
22 #include "api/video/video_frame_buffer.h"
23 #include "rtc_base/ref_counted_object.h"
24 #include "rtc_tools/frame_analyzer/video_quality_analysis.h"
25
26 namespace webrtc {
27 namespace test {
28
29 namespace {
30
31 // This constant controls how many frames we look ahead while seeking for the
32 // match for the next frame. Note that we may span bigger gaps than this number
33 // since we reset the counter as soon as we find a better match. The seeking
34 // will stop when there is no improvement in the next kNumberOfFramesLookAhead
35 // frames. Typically, the SSIM will improve as we get closer and closer to the
36 // real match.
37 const int kNumberOfFramesLookAhead = 60;
38
39 // Helper class that takes a video and generates an infinite looping video.
40 class LoopingVideo : public rtc::RefCountedObject<Video> {
41 public:
LoopingVideo(const rtc::scoped_refptr<Video> & video)42 explicit LoopingVideo(const rtc::scoped_refptr<Video>& video)
43 : video_(video) {}
44
width() const45 int width() const override { return video_->width(); }
height() const46 int height() const override { return video_->height(); }
number_of_frames() const47 size_t number_of_frames() const override {
48 return std::numeric_limits<size_t>::max();
49 }
50
GetFrame(size_t index) const51 rtc::scoped_refptr<I420BufferInterface> GetFrame(
52 size_t index) const override {
53 return video_->GetFrame(index % video_->number_of_frames());
54 }
55
56 private:
57 const rtc::scoped_refptr<Video> video_;
58 };
59
60 // Helper class that take a vector of frame indices and a video and produces a
61 // new video where the frames have been reshuffled.
62 class ReorderedVideo : public rtc::RefCountedObject<Video> {
63 public:
ReorderedVideo(const rtc::scoped_refptr<Video> & video,const std::vector<size_t> & indices)64 ReorderedVideo(const rtc::scoped_refptr<Video>& video,
65 const std::vector<size_t>& indices)
66 : video_(video), indices_(indices) {}
67
width() const68 int width() const override { return video_->width(); }
height() const69 int height() const override { return video_->height(); }
number_of_frames() const70 size_t number_of_frames() const override { return indices_.size(); }
71
GetFrame(size_t index) const72 rtc::scoped_refptr<I420BufferInterface> GetFrame(
73 size_t index) const override {
74 return video_->GetFrame(indices_.at(index));
75 }
76
77 private:
78 const rtc::scoped_refptr<Video> video_;
79 const std::vector<size_t> indices_;
80 };
81
82 // Helper class that takes a video and produces a downscaled video.
83 class DownscaledVideo : public rtc::RefCountedObject<Video> {
84 public:
DownscaledVideo(float scale_factor,const rtc::scoped_refptr<Video> & video)85 DownscaledVideo(float scale_factor, const rtc::scoped_refptr<Video>& video)
86 : downscaled_width_(
87 static_cast<int>(std::round(scale_factor * video->width()))),
88 downscaled_height_(
89 static_cast<int>(std::round(scale_factor * video->height()))),
90 video_(video) {}
91
width() const92 int width() const override { return downscaled_width_; }
height() const93 int height() const override { return downscaled_height_; }
number_of_frames() const94 size_t number_of_frames() const override {
95 return video_->number_of_frames();
96 }
97
GetFrame(size_t index) const98 rtc::scoped_refptr<I420BufferInterface> GetFrame(
99 size_t index) const override {
100 const rtc::scoped_refptr<I420BufferInterface> frame =
101 video_->GetFrame(index);
102 rtc::scoped_refptr<I420Buffer> downscaled_frame =
103 I420Buffer::Create(downscaled_width_, downscaled_height_);
104 downscaled_frame->ScaleFrom(*frame);
105 return downscaled_frame;
106 }
107
108 private:
109 const int downscaled_width_;
110 const int downscaled_height_;
111 const rtc::scoped_refptr<Video> video_;
112 };
113
114 // Helper class that takes a video and caches the latest frame access. This
115 // improves performance a lot since the original source is often from a file.
116 class CachedVideo : public rtc::RefCountedObject<Video> {
117 public:
CachedVideo(int max_cache_size,const rtc::scoped_refptr<Video> & video)118 CachedVideo(int max_cache_size, const rtc::scoped_refptr<Video>& video)
119 : max_cache_size_(max_cache_size), video_(video) {}
120
width() const121 int width() const override { return video_->width(); }
height() const122 int height() const override { return video_->height(); }
number_of_frames() const123 size_t number_of_frames() const override {
124 return video_->number_of_frames();
125 }
126
GetFrame(size_t index) const127 rtc::scoped_refptr<I420BufferInterface> GetFrame(
128 size_t index) const override {
129 for (const CachedFrame& cached_frame : cache_) {
130 if (cached_frame.index == index)
131 return cached_frame.frame;
132 }
133
134 rtc::scoped_refptr<I420BufferInterface> frame = video_->GetFrame(index);
135 cache_.push_front({index, frame});
136 if (cache_.size() > max_cache_size_)
137 cache_.pop_back();
138
139 return frame;
140 }
141
142 private:
143 struct CachedFrame {
144 size_t index;
145 rtc::scoped_refptr<I420BufferInterface> frame;
146 };
147
148 const size_t max_cache_size_;
149 const rtc::scoped_refptr<Video> video_;
150 mutable std::deque<CachedFrame> cache_;
151 };
152
153 // Try matching the test frame against all frames in the reference video and
154 // return the index of the best matching frame.
FindBestMatch(const rtc::scoped_refptr<I420BufferInterface> & test_frame,const Video & reference_video)155 size_t FindBestMatch(const rtc::scoped_refptr<I420BufferInterface>& test_frame,
156 const Video& reference_video) {
157 std::vector<double> ssim;
158 for (const auto& ref_frame : reference_video)
159 ssim.push_back(Ssim(test_frame, ref_frame));
160 return std::distance(ssim.begin(),
161 std::max_element(ssim.begin(), ssim.end()));
162 }
163
164 // Find and return the index of the frame matching the test frame. The search
165 // starts at the starting index and continues until there is no better match
166 // within the next kNumberOfFramesLookAhead frames.
FindNextMatch(const rtc::scoped_refptr<I420BufferInterface> & test_frame,const Video & reference_video,size_t start_index)167 size_t FindNextMatch(const rtc::scoped_refptr<I420BufferInterface>& test_frame,
168 const Video& reference_video,
169 size_t start_index) {
170 const double start_ssim =
171 Ssim(test_frame, reference_video.GetFrame(start_index));
172 for (int i = 1; i < kNumberOfFramesLookAhead; ++i) {
173 const size_t next_index = start_index + i;
174 // If we find a better match, restart the search at that point.
175 if (start_ssim < Ssim(test_frame, reference_video.GetFrame(next_index)))
176 return FindNextMatch(test_frame, reference_video, next_index);
177 }
178 // The starting index was the best match.
179 return start_index;
180 }
181
182 } // namespace
183
FindMatchingFrameIndices(const rtc::scoped_refptr<Video> & reference_video,const rtc::scoped_refptr<Video> & test_video)184 std::vector<size_t> FindMatchingFrameIndices(
185 const rtc::scoped_refptr<Video>& reference_video,
186 const rtc::scoped_refptr<Video>& test_video) {
187 // This is done to get a 10x speedup. We don't need the full resolution in
188 // order to match frames, and we should limit file access and not read the
189 // same memory tens of times.
190 const float kScaleFactor = 0.25f;
191 const rtc::scoped_refptr<Video> cached_downscaled_reference_video =
192 new CachedVideo(kNumberOfFramesLookAhead,
193 new DownscaledVideo(kScaleFactor, reference_video));
194 const rtc::scoped_refptr<Video> downscaled_test_video =
195 new DownscaledVideo(kScaleFactor, test_video);
196
197 // Assume the video is looping around.
198 const rtc::scoped_refptr<Video> looping_reference_video =
199 new LoopingVideo(cached_downscaled_reference_video);
200
201 std::vector<size_t> match_indices;
202 for (const rtc::scoped_refptr<I420BufferInterface>& test_frame :
203 *downscaled_test_video) {
204 if (match_indices.empty()) {
205 // First frame.
206 match_indices.push_back(
207 FindBestMatch(test_frame, *cached_downscaled_reference_video));
208 } else {
209 match_indices.push_back(FindNextMatch(
210 test_frame, *looping_reference_video, match_indices.back()));
211 }
212 }
213
214 return match_indices;
215 }
216
ReorderVideo(const rtc::scoped_refptr<Video> & video,const std::vector<size_t> & indices)217 rtc::scoped_refptr<Video> ReorderVideo(const rtc::scoped_refptr<Video>& video,
218 const std::vector<size_t>& indices) {
219 return new ReorderedVideo(new LoopingVideo(video), indices);
220 }
221
GenerateAlignedReferenceVideo(const rtc::scoped_refptr<Video> & reference_video,const rtc::scoped_refptr<Video> & test_video)222 rtc::scoped_refptr<Video> GenerateAlignedReferenceVideo(
223 const rtc::scoped_refptr<Video>& reference_video,
224 const rtc::scoped_refptr<Video>& test_video) {
225 return GenerateAlignedReferenceVideo(
226 reference_video, FindMatchingFrameIndices(reference_video, test_video));
227 }
228
GenerateAlignedReferenceVideo(const rtc::scoped_refptr<Video> & reference_video,const std::vector<size_t> & indices)229 rtc::scoped_refptr<Video> GenerateAlignedReferenceVideo(
230 const rtc::scoped_refptr<Video>& reference_video,
231 const std::vector<size_t>& indices) {
232 return ReorderVideo(reference_video, indices);
233 }
234
235 } // namespace test
236 } // namespace webrtc
237