• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stdlib.h>
12 #include <time.h>
13 
14 #include "libyuv/cpu_id.h"
15 #include "libyuv/scale.h"
16 #include "../unit_test/unit_test.h"
17 
18 #define STRINGIZE(line) #line
19 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
20 
21 namespace libyuv {
22 
23 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)24 static int TestFilter(int src_width, int src_height,
25                       int dst_width, int dst_height,
26                       FilterMode f, int benchmark_iterations,
27                       int disable_cpu_flags, int benchmark_cpu_info) {
28   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
29     return 0;
30   }
31 
32   int i, j;
33   const int b = 0;  // 128 to test for padding/stride.
34   int src_width_uv = (Abs(src_width) + 1) >> 1;
35   int src_height_uv = (Abs(src_height) + 1) >> 1;
36 
37   int64 src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2);
38   int64 src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2);
39 
40   int src_stride_y = b * 2 + Abs(src_width);
41   int src_stride_uv = b * 2 + src_width_uv;
42 
43   align_buffer_page_end(src_y, src_y_plane_size)
44   align_buffer_page_end(src_u, src_uv_plane_size)
45   align_buffer_page_end(src_v, src_uv_plane_size)
46   if (!src_y || !src_u || !src_v) {
47     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
48     return 0;
49   }
50   MemRandomize(src_y, src_y_plane_size);
51   MemRandomize(src_u, src_uv_plane_size);
52   MemRandomize(src_v, src_uv_plane_size);
53 
54   int dst_width_uv = (dst_width + 1) >> 1;
55   int dst_height_uv = (dst_height + 1) >> 1;
56 
57   int64 dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2);
58   int64 dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2);
59 
60   int dst_stride_y = b * 2 + dst_width;
61   int dst_stride_uv = b * 2 + dst_width_uv;
62 
63   align_buffer_page_end(dst_y_c, dst_y_plane_size)
64   align_buffer_page_end(dst_u_c, dst_uv_plane_size)
65   align_buffer_page_end(dst_v_c, dst_uv_plane_size)
66   align_buffer_page_end(dst_y_opt, dst_y_plane_size)
67   align_buffer_page_end(dst_u_opt, dst_uv_plane_size)
68   align_buffer_page_end(dst_v_opt, dst_uv_plane_size)
69   if (!dst_y_c || !dst_u_c || !dst_v_c ||
70       !dst_y_opt|| !dst_u_opt|| !dst_v_opt) {
71     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
72     return 0;
73   }
74 
75   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
76   double c_time = get_time();
77   I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
78             src_u + (src_stride_uv * b) + b, src_stride_uv,
79             src_v + (src_stride_uv * b) + b, src_stride_uv,
80             src_width, src_height,
81             dst_y_c + (dst_stride_y * b) + b, dst_stride_y,
82             dst_u_c + (dst_stride_uv * b) + b, dst_stride_uv,
83             dst_v_c + (dst_stride_uv * b) + b, dst_stride_uv,
84             dst_width, dst_height, f);
85   c_time = (get_time() - c_time);
86 
87   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
88   double opt_time = get_time();
89   for (i = 0; i < benchmark_iterations; ++i) {
90     I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
91               src_u + (src_stride_uv * b) + b, src_stride_uv,
92               src_v + (src_stride_uv * b) + b, src_stride_uv,
93               src_width, src_height,
94               dst_y_opt + (dst_stride_y * b) + b, dst_stride_y,
95               dst_u_opt + (dst_stride_uv * b) + b, dst_stride_uv,
96               dst_v_opt + (dst_stride_uv * b) + b, dst_stride_uv,
97               dst_width, dst_height, f);
98   }
99   opt_time = (get_time() - opt_time) / benchmark_iterations;
100   // Report performance of C vs OPT
101   printf("filter %d - %8d us C - %8d us OPT\n",
102          f,
103          static_cast<int>(c_time * 1e6),
104          static_cast<int>(opt_time * 1e6));
105 
106   // C version may be a little off from the optimized. Order of
107   //  operations may introduce rounding somewhere. So do a difference
108   //  of the buffers and look to see that the max difference isn't
109   //  over 2.
110   int max_diff = 0;
111   for (i = b; i < (dst_height + b); ++i) {
112     for (j = b; j < (dst_width + b); ++j) {
113       int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
114                          dst_y_opt[(i * dst_stride_y) + j]);
115       if (abs_diff > max_diff) {
116         max_diff = abs_diff;
117       }
118     }
119   }
120 
121   for (i = b; i < (dst_height_uv + b); ++i) {
122     for (j = b; j < (dst_width_uv + b); ++j) {
123       int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
124                          dst_u_opt[(i * dst_stride_uv) + j]);
125       if (abs_diff > max_diff) {
126         max_diff = abs_diff;
127       }
128       abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
129                      dst_v_opt[(i * dst_stride_uv) + j]);
130       if (abs_diff > max_diff) {
131         max_diff = abs_diff;
132       }
133     }
134   }
135 
136   free_aligned_buffer_page_end(dst_y_c)
137   free_aligned_buffer_page_end(dst_u_c)
138   free_aligned_buffer_page_end(dst_v_c)
139   free_aligned_buffer_page_end(dst_y_opt)
140   free_aligned_buffer_page_end(dst_u_opt)
141   free_aligned_buffer_page_end(dst_v_opt)
142 
143   free_aligned_buffer_page_end(src_y)
144   free_aligned_buffer_page_end(src_u)
145   free_aligned_buffer_page_end(src_v)
146 
147   return max_diff;
148 }
149 
150 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
151 // 0 = exact.
TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations)152 static int TestFilter_16(int src_width, int src_height,
153                          int dst_width, int dst_height,
154                          FilterMode f, int benchmark_iterations) {
155   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
156     return 0;
157   }
158 
159   int i, j;
160   const int b = 0;  // 128 to test for padding/stride.
161   int src_width_uv = (Abs(src_width) + 1) >> 1;
162   int src_height_uv = (Abs(src_height) + 1) >> 1;
163 
164   int64 src_y_plane_size = (Abs(src_width) + b * 2) *
165       (Abs(src_height) + b * 2);
166   int64 src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2);
167 
168   int src_stride_y = b * 2 + Abs(src_width);
169   int src_stride_uv = b * 2 + src_width_uv;
170 
171   align_buffer_page_end(src_y, src_y_plane_size)
172   align_buffer_page_end(src_u, src_uv_plane_size)
173   align_buffer_page_end(src_v, src_uv_plane_size)
174   align_buffer_page_end(src_y_16, src_y_plane_size * 2)
175   align_buffer_page_end(src_u_16, src_uv_plane_size * 2)
176   align_buffer_page_end(src_v_16, src_uv_plane_size * 2)
177   uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
178   uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16);
179   uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16);
180 
181   MemRandomize(src_y, src_y_plane_size);
182   MemRandomize(src_u, src_uv_plane_size);
183   MemRandomize(src_v, src_uv_plane_size);
184 
185   for (i = b; i < src_height + b; ++i) {
186     for (j = b; j < src_width + b; ++j) {
187       p_src_y_16[(i * src_stride_y) + j] = src_y[(i * src_stride_y) + j];
188     }
189   }
190 
191   for (i = b; i < (src_height_uv + b); ++i) {
192     for (j = b; j < (src_width_uv + b); ++j) {
193       p_src_u_16[(i * src_stride_uv) + j] = src_u[(i * src_stride_uv) + j];
194       p_src_v_16[(i * src_stride_uv) + j] = src_v[(i * src_stride_uv) + j];
195     }
196   }
197 
198   int dst_width_uv = (dst_width + 1) >> 1;
199   int dst_height_uv = (dst_height + 1) >> 1;
200 
201   int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2);
202   int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2);
203 
204   int dst_stride_y = b * 2 + dst_width;
205   int dst_stride_uv = b * 2 + dst_width_uv;
206 
207   align_buffer_page_end(dst_y_8, dst_y_plane_size)
208   align_buffer_page_end(dst_u_8, dst_uv_plane_size)
209   align_buffer_page_end(dst_v_8, dst_uv_plane_size)
210   align_buffer_page_end(dst_y_16, dst_y_plane_size * 2)
211   align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2)
212   align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2)
213 
214   uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16);
215   uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16);
216   uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16);
217 
218   I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
219             src_u + (src_stride_uv * b) + b, src_stride_uv,
220             src_v + (src_stride_uv * b) + b, src_stride_uv,
221             src_width, src_height,
222             dst_y_8 + (dst_stride_y * b) + b, dst_stride_y,
223             dst_u_8 + (dst_stride_uv * b) + b, dst_stride_uv,
224             dst_v_8 + (dst_stride_uv * b) + b, dst_stride_uv,
225             dst_width, dst_height, f);
226 
227   for (i = 0; i < benchmark_iterations; ++i) {
228     I420Scale_16(p_src_y_16 + (src_stride_y * b) + b, src_stride_y,
229                  p_src_u_16 + (src_stride_uv * b) + b, src_stride_uv,
230                  p_src_v_16 + (src_stride_uv * b) + b, src_stride_uv,
231                  src_width, src_height,
232                  p_dst_y_16 + (dst_stride_y * b) + b, dst_stride_y,
233                  p_dst_u_16 + (dst_stride_uv * b) + b, dst_stride_uv,
234                  p_dst_v_16 + (dst_stride_uv * b) + b, dst_stride_uv,
235                  dst_width, dst_height, f);
236   }
237 
238   // Expect an exact match
239   int max_diff = 0;
240   for (i = b; i < (dst_height + b); ++i) {
241     for (j = b; j < (dst_width + b); ++j) {
242       int abs_diff = Abs(dst_y_8[(i * dst_stride_y) + j] -
243                          p_dst_y_16[(i * dst_stride_y) + j]);
244       if (abs_diff > max_diff) {
245         max_diff = abs_diff;
246       }
247     }
248   }
249 
250   for (i = b; i < (dst_height_uv + b); ++i) {
251     for (j = b; j < (dst_width_uv + b); ++j) {
252       int abs_diff = Abs(dst_u_8[(i * dst_stride_uv) + j] -
253                          p_dst_u_16[(i * dst_stride_uv) + j]);
254       if (abs_diff > max_diff) {
255         max_diff = abs_diff;
256       }
257       abs_diff = Abs(dst_v_8[(i * dst_stride_uv) + j] -
258                      p_dst_v_16[(i * dst_stride_uv) + j]);
259       if (abs_diff > max_diff) {
260         max_diff = abs_diff;
261       }
262     }
263   }
264 
265   free_aligned_buffer_page_end(dst_y_8)
266   free_aligned_buffer_page_end(dst_u_8)
267   free_aligned_buffer_page_end(dst_v_8)
268   free_aligned_buffer_page_end(dst_y_16)
269   free_aligned_buffer_page_end(dst_u_16)
270   free_aligned_buffer_page_end(dst_v_16)
271 
272   free_aligned_buffer_page_end(src_y)
273   free_aligned_buffer_page_end(src_u)
274   free_aligned_buffer_page_end(src_v)
275   free_aligned_buffer_page_end(src_y_16)
276   free_aligned_buffer_page_end(src_u_16)
277   free_aligned_buffer_page_end(src_v_16)
278 
279   return max_diff;
280 }
281 
282 // The following adjustments in dimensions ensure the scale factor will be
283 // exactly achieved.
284 // 2 is chroma subsample
285 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
286 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
287 
288 #define TEST_FACTOR1(name, filter, nom, denom, max_diff)                       \
289     TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) {                    \
290       int diff = TestFilter(SX(benchmark_width_, nom, denom),                  \
291                             SX(benchmark_height_, nom, denom),                 \
292                             DX(benchmark_width_, nom, denom),                  \
293                             DX(benchmark_height_, nom, denom),                 \
294                             kFilter##filter, benchmark_iterations_,            \
295                             disable_cpu_flags_, benchmark_cpu_info_);          \
296       EXPECT_LE(diff, max_diff);                                               \
297     }                                                                          \
298     TEST_F(LibYUVScaleTest, DISABLED_ScaleDownBy##name##_##filter##_16) {      \
299       int diff = TestFilter_16(SX(benchmark_width_, nom, denom),               \
300                                SX(benchmark_height_, nom, denom),              \
301                                DX(benchmark_width_, nom, denom),               \
302                                DX(benchmark_height_, nom, denom),              \
303                                kFilter##filter, benchmark_iterations_);        \
304       EXPECT_LE(diff, max_diff);                                               \
305     }
306 
307 // Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
308 // filtering is different fixed point implementations for SSSE3, Neon and C.
309 #define TEST_FACTOR(name, nom, denom, boxdiff)                                 \
310     TEST_FACTOR1(name, None, nom, denom, 0)                                    \
311     TEST_FACTOR1(name, Linear, nom, denom, 3)                                  \
312     TEST_FACTOR1(name, Bilinear, nom, denom, 3)                                \
313     TEST_FACTOR1(name, Box, nom, denom, boxdiff)
314 
315 TEST_FACTOR(2, 1, 2, 0)
316 TEST_FACTOR(4, 1, 4, 0)
317 TEST_FACTOR(8, 1, 8, 0)
318 TEST_FACTOR(3by4, 3, 4, 1)
319 TEST_FACTOR(3by8, 3, 8, 1)
320 TEST_FACTOR(3, 1, 3, 0)
321 #undef TEST_FACTOR1
322 #undef TEST_FACTOR
323 #undef SX
324 #undef DX
325 
326 #define TEST_SCALETO1(name, width, height, filter, max_diff)                   \
327     TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) {           \
328       int diff = TestFilter(benchmark_width_, benchmark_height_,               \
329                             width, height,                                     \
330                             kFilter##filter, benchmark_iterations_,            \
331                             disable_cpu_flags_, benchmark_cpu_info_);          \
332       EXPECT_LE(diff, max_diff);                                               \
333     }                                                                          \
334     TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) {         \
335       int diff = TestFilter(width, height,                                     \
336                             Abs(benchmark_width_), Abs(benchmark_height_),     \
337                             kFilter##filter, benchmark_iterations_,            \
338                             disable_cpu_flags_, benchmark_cpu_info_);          \
339       EXPECT_LE(diff, max_diff);                                               \
340     }                                                                          \
341     TEST_F(LibYUVScaleTest,                                                    \
342         DISABLED_##name##To##width##x##height##_##filter##_16) {               \
343       int diff = TestFilter_16(benchmark_width_, benchmark_height_,            \
344                                width, height,                                  \
345                                kFilter##filter, benchmark_iterations_);        \
346       EXPECT_LE(diff, max_diff);                                               \
347     }                                                                          \
348     TEST_F(LibYUVScaleTest,                                                    \
349         DISABLED_##name##From##width##x##height##_##filter##_16) {             \
350       int diff = TestFilter_16(width, height,                                  \
351                                Abs(benchmark_width_), Abs(benchmark_height_),  \
352                                kFilter##filter, benchmark_iterations_);        \
353       EXPECT_LE(diff, max_diff);                                               \
354     }
355 
356 // Test scale to a specified size with all 4 filters.
357 #define TEST_SCALETO(name, width, height)                                      \
358     TEST_SCALETO1(name, width, height, None, 0)                                \
359     TEST_SCALETO1(name, width, height, Linear, 0)                              \
360     TEST_SCALETO1(name, width, height, Bilinear, 0)                            \
361     TEST_SCALETO1(name, width, height, Box, 0)
362 
363 TEST_SCALETO(Scale, 1, 1)
364 TEST_SCALETO(Scale, 320, 240)
365 TEST_SCALETO(Scale, 352, 288)
366 TEST_SCALETO(Scale, 569, 480)
367 TEST_SCALETO(Scale, 640, 360)
368 TEST_SCALETO(Scale, 1280, 720)
369 #undef TEST_SCALETO1
370 #undef TEST_SCALETO
371 
372 }  // namespace libyuv
373