• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stdlib.h>
12 #include <time.h>
13 
14 #include "../unit_test/unit_test.h"
15 #include "libyuv/cpu_id.h"
16 #include "libyuv/scale.h"
17 #include "libyuv/scale_row.h"  // For ScaleRowDown2Box_Odd_C
18 
19 #define STRINGIZE(line) #line
20 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
21 
22 namespace libyuv {
23 
24 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I420TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)25 static int I420TestFilter(int src_width,
26                           int src_height,
27                           int dst_width,
28                           int dst_height,
29                           FilterMode f,
30                           int benchmark_iterations,
31                           int disable_cpu_flags,
32                           int benchmark_cpu_info) {
33   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
34     return 0;
35   }
36 
37   int i, j;
38   int src_width_uv = (Abs(src_width) + 1) >> 1;
39   int src_height_uv = (Abs(src_height) + 1) >> 1;
40 
41   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
42   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
43 
44   int src_stride_y = Abs(src_width);
45   int src_stride_uv = src_width_uv;
46 
47   align_buffer_page_end(src_y, src_y_plane_size);
48   align_buffer_page_end(src_u, src_uv_plane_size);
49   align_buffer_page_end(src_v, src_uv_plane_size);
50   if (!src_y || !src_u || !src_v) {
51     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
52     return 0;
53   }
54   MemRandomize(src_y, src_y_plane_size);
55   MemRandomize(src_u, src_uv_plane_size);
56   MemRandomize(src_v, src_uv_plane_size);
57 
58   int dst_width_uv = (dst_width + 1) >> 1;
59   int dst_height_uv = (dst_height + 1) >> 1;
60 
61   int64_t dst_y_plane_size = (dst_width) * (dst_height);
62   int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
63 
64   int dst_stride_y = dst_width;
65   int dst_stride_uv = dst_width_uv;
66 
67   align_buffer_page_end(dst_y_c, dst_y_plane_size);
68   align_buffer_page_end(dst_u_c, dst_uv_plane_size);
69   align_buffer_page_end(dst_v_c, dst_uv_plane_size);
70   align_buffer_page_end(dst_y_opt, dst_y_plane_size);
71   align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
72   align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
73   if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
74       !dst_v_opt) {
75     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
76     return 0;
77   }
78 
79   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
80   double c_time = get_time();
81   I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
82             src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
83             dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
84   c_time = (get_time() - c_time);
85 
86   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
87   double opt_time = get_time();
88   for (i = 0; i < benchmark_iterations; ++i) {
89     I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
90               src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
91               dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
92               f);
93   }
94   opt_time = (get_time() - opt_time) / benchmark_iterations;
95   // Report performance of C vs OPT.
96   printf("filter %d - %8d us C - %8d us OPT\n", f,
97          static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
98 
99   // C version may be a little off from the optimized. Order of
100   //  operations may introduce rounding somewhere. So do a difference
101   //  of the buffers and look to see that the max difference is not
102   //  over 3.
103   int max_diff = 0;
104   for (i = 0; i < (dst_height); ++i) {
105     for (j = 0; j < (dst_width); ++j) {
106       int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
107                          dst_y_opt[(i * dst_stride_y) + j]);
108       if (abs_diff > max_diff) {
109         max_diff = abs_diff;
110       }
111     }
112   }
113 
114   for (i = 0; i < (dst_height_uv); ++i) {
115     for (j = 0; j < (dst_width_uv); ++j) {
116       int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
117                          dst_u_opt[(i * dst_stride_uv) + j]);
118       if (abs_diff > max_diff) {
119         max_diff = abs_diff;
120       }
121       abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
122                      dst_v_opt[(i * dst_stride_uv) + j]);
123       if (abs_diff > max_diff) {
124         max_diff = abs_diff;
125       }
126     }
127   }
128 
129   free_aligned_buffer_page_end(dst_y_c);
130   free_aligned_buffer_page_end(dst_u_c);
131   free_aligned_buffer_page_end(dst_v_c);
132   free_aligned_buffer_page_end(dst_y_opt);
133   free_aligned_buffer_page_end(dst_u_opt);
134   free_aligned_buffer_page_end(dst_v_opt);
135   free_aligned_buffer_page_end(src_y);
136   free_aligned_buffer_page_end(src_u);
137   free_aligned_buffer_page_end(src_v);
138 
139   return max_diff;
140 }
141 
142 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
143 // 0 = exact.
I420TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)144 static int I420TestFilter_16(int src_width,
145                              int src_height,
146                              int dst_width,
147                              int dst_height,
148                              FilterMode f,
149                              int benchmark_iterations,
150                              int disable_cpu_flags,
151                              int benchmark_cpu_info) {
152   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
153     return 0;
154   }
155 
156   int i;
157   int src_width_uv = (Abs(src_width) + 1) >> 1;
158   int src_height_uv = (Abs(src_height) + 1) >> 1;
159 
160   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
161   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
162 
163   int src_stride_y = Abs(src_width);
164   int src_stride_uv = src_width_uv;
165 
166   align_buffer_page_end(src_y, src_y_plane_size);
167   align_buffer_page_end(src_u, src_uv_plane_size);
168   align_buffer_page_end(src_v, src_uv_plane_size);
169   align_buffer_page_end(src_y_16, src_y_plane_size * 2);
170   align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
171   align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
172   if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
173     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
174     return 0;
175   }
176   uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
177   uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
178   uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
179 
180   MemRandomize(src_y, src_y_plane_size);
181   MemRandomize(src_u, src_uv_plane_size);
182   MemRandomize(src_v, src_uv_plane_size);
183 
184   for (i = 0; i < src_y_plane_size; ++i) {
185     p_src_y_16[i] = src_y[i];
186   }
187   for (i = 0; i < src_uv_plane_size; ++i) {
188     p_src_u_16[i] = src_u[i];
189     p_src_v_16[i] = src_v[i];
190   }
191 
192   int dst_width_uv = (dst_width + 1) >> 1;
193   int dst_height_uv = (dst_height + 1) >> 1;
194 
195   int dst_y_plane_size = (dst_width) * (dst_height);
196   int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
197 
198   int dst_stride_y = dst_width;
199   int dst_stride_uv = dst_width_uv;
200 
201   align_buffer_page_end(dst_y_8, dst_y_plane_size);
202   align_buffer_page_end(dst_u_8, dst_uv_plane_size);
203   align_buffer_page_end(dst_v_8, dst_uv_plane_size);
204   align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
205   align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
206   align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
207 
208   uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
209   uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
210   uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
211 
212   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
213   I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
214             src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
215             dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
216   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
217   for (i = 0; i < benchmark_iterations; ++i) {
218     I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
219                  p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
220                  dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
221                  dst_stride_uv, dst_width, dst_height, f);
222   }
223 
224   // Expect an exact match.
225   int max_diff = 0;
226   for (i = 0; i < dst_y_plane_size; ++i) {
227     int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
228     if (abs_diff > max_diff) {
229       max_diff = abs_diff;
230     }
231   }
232   for (i = 0; i < dst_uv_plane_size; ++i) {
233     int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
234     if (abs_diff > max_diff) {
235       max_diff = abs_diff;
236     }
237     abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
238     if (abs_diff > max_diff) {
239       max_diff = abs_diff;
240     }
241   }
242 
243   free_aligned_buffer_page_end(dst_y_8);
244   free_aligned_buffer_page_end(dst_u_8);
245   free_aligned_buffer_page_end(dst_v_8);
246   free_aligned_buffer_page_end(dst_y_16);
247   free_aligned_buffer_page_end(dst_u_16);
248   free_aligned_buffer_page_end(dst_v_16);
249   free_aligned_buffer_page_end(src_y);
250   free_aligned_buffer_page_end(src_u);
251   free_aligned_buffer_page_end(src_v);
252   free_aligned_buffer_page_end(src_y_16);
253   free_aligned_buffer_page_end(src_u_16);
254   free_aligned_buffer_page_end(src_v_16);
255 
256   return max_diff;
257 }
258 
259 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I444TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)260 static int I444TestFilter(int src_width,
261                           int src_height,
262                           int dst_width,
263                           int dst_height,
264                           FilterMode f,
265                           int benchmark_iterations,
266                           int disable_cpu_flags,
267                           int benchmark_cpu_info) {
268   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
269     return 0;
270   }
271 
272   int i, j;
273   int src_width_uv = Abs(src_width);
274   int src_height_uv = Abs(src_height);
275 
276   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
277   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
278 
279   int src_stride_y = Abs(src_width);
280   int src_stride_uv = src_width_uv;
281 
282   align_buffer_page_end(src_y, src_y_plane_size);
283   align_buffer_page_end(src_u, src_uv_plane_size);
284   align_buffer_page_end(src_v, src_uv_plane_size);
285   if (!src_y || !src_u || !src_v) {
286     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
287     return 0;
288   }
289   MemRandomize(src_y, src_y_plane_size);
290   MemRandomize(src_u, src_uv_plane_size);
291   MemRandomize(src_v, src_uv_plane_size);
292 
293   int dst_width_uv = dst_width;
294   int dst_height_uv = dst_height;
295 
296   int64_t dst_y_plane_size = (dst_width) * (dst_height);
297   int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
298 
299   int dst_stride_y = dst_width;
300   int dst_stride_uv = dst_width_uv;
301 
302   align_buffer_page_end(dst_y_c, dst_y_plane_size);
303   align_buffer_page_end(dst_u_c, dst_uv_plane_size);
304   align_buffer_page_end(dst_v_c, dst_uv_plane_size);
305   align_buffer_page_end(dst_y_opt, dst_y_plane_size);
306   align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
307   align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
308   if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
309       !dst_v_opt) {
310     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
311     return 0;
312   }
313 
314   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
315   double c_time = get_time();
316   I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
317             src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
318             dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
319   c_time = (get_time() - c_time);
320 
321   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
322   double opt_time = get_time();
323   for (i = 0; i < benchmark_iterations; ++i) {
324     I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
325               src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
326               dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
327               f);
328   }
329   opt_time = (get_time() - opt_time) / benchmark_iterations;
330   // Report performance of C vs OPT.
331   printf("filter %d - %8d us C - %8d us OPT\n", f,
332          static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
333 
334   // C version may be a little off from the optimized. Order of
335   //  operations may introduce rounding somewhere. So do a difference
336   //  of the buffers and look to see that the max difference is not
337   //  over 3.
338   int max_diff = 0;
339   for (i = 0; i < (dst_height); ++i) {
340     for (j = 0; j < (dst_width); ++j) {
341       int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
342                          dst_y_opt[(i * dst_stride_y) + j]);
343       if (abs_diff > max_diff) {
344         max_diff = abs_diff;
345       }
346     }
347   }
348 
349   for (i = 0; i < (dst_height_uv); ++i) {
350     for (j = 0; j < (dst_width_uv); ++j) {
351       int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
352                          dst_u_opt[(i * dst_stride_uv) + j]);
353       if (abs_diff > max_diff) {
354         max_diff = abs_diff;
355       }
356       abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
357                      dst_v_opt[(i * dst_stride_uv) + j]);
358       if (abs_diff > max_diff) {
359         max_diff = abs_diff;
360       }
361     }
362   }
363 
364   free_aligned_buffer_page_end(dst_y_c);
365   free_aligned_buffer_page_end(dst_u_c);
366   free_aligned_buffer_page_end(dst_v_c);
367   free_aligned_buffer_page_end(dst_y_opt);
368   free_aligned_buffer_page_end(dst_u_opt);
369   free_aligned_buffer_page_end(dst_v_opt);
370   free_aligned_buffer_page_end(src_y);
371   free_aligned_buffer_page_end(src_u);
372   free_aligned_buffer_page_end(src_v);
373 
374   return max_diff;
375 }
376 
377 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
378 // 0 = exact.
I444TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)379 static int I444TestFilter_16(int src_width,
380                              int src_height,
381                              int dst_width,
382                              int dst_height,
383                              FilterMode f,
384                              int benchmark_iterations,
385                              int disable_cpu_flags,
386                              int benchmark_cpu_info) {
387   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
388     return 0;
389   }
390 
391   int i;
392   int src_width_uv = Abs(src_width);
393   int src_height_uv = Abs(src_height);
394 
395   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
396   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
397 
398   int src_stride_y = Abs(src_width);
399   int src_stride_uv = src_width_uv;
400 
401   align_buffer_page_end(src_y, src_y_plane_size);
402   align_buffer_page_end(src_u, src_uv_plane_size);
403   align_buffer_page_end(src_v, src_uv_plane_size);
404   align_buffer_page_end(src_y_16, src_y_plane_size * 2);
405   align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
406   align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
407   if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
408     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
409     return 0;
410   }
411   uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
412   uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
413   uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
414 
415   MemRandomize(src_y, src_y_plane_size);
416   MemRandomize(src_u, src_uv_plane_size);
417   MemRandomize(src_v, src_uv_plane_size);
418 
419   for (i = 0; i < src_y_plane_size; ++i) {
420     p_src_y_16[i] = src_y[i];
421   }
422   for (i = 0; i < src_uv_plane_size; ++i) {
423     p_src_u_16[i] = src_u[i];
424     p_src_v_16[i] = src_v[i];
425   }
426 
427   int dst_width_uv = dst_width;
428   int dst_height_uv = dst_height;
429 
430   int dst_y_plane_size = (dst_width) * (dst_height);
431   int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
432 
433   int dst_stride_y = dst_width;
434   int dst_stride_uv = dst_width_uv;
435 
436   align_buffer_page_end(dst_y_8, dst_y_plane_size);
437   align_buffer_page_end(dst_u_8, dst_uv_plane_size);
438   align_buffer_page_end(dst_v_8, dst_uv_plane_size);
439   align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
440   align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
441   align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
442 
443   uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
444   uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
445   uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
446 
447   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
448   I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
449             src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
450             dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
451   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
452   for (i = 0; i < benchmark_iterations; ++i) {
453     I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
454                  p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
455                  dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
456                  dst_stride_uv, dst_width, dst_height, f);
457   }
458 
459   // Expect an exact match.
460   int max_diff = 0;
461   for (i = 0; i < dst_y_plane_size; ++i) {
462     int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
463     if (abs_diff > max_diff) {
464       max_diff = abs_diff;
465     }
466   }
467   for (i = 0; i < dst_uv_plane_size; ++i) {
468     int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
469     if (abs_diff > max_diff) {
470       max_diff = abs_diff;
471     }
472     abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
473     if (abs_diff > max_diff) {
474       max_diff = abs_diff;
475     }
476   }
477 
478   free_aligned_buffer_page_end(dst_y_8);
479   free_aligned_buffer_page_end(dst_u_8);
480   free_aligned_buffer_page_end(dst_v_8);
481   free_aligned_buffer_page_end(dst_y_16);
482   free_aligned_buffer_page_end(dst_u_16);
483   free_aligned_buffer_page_end(dst_v_16);
484   free_aligned_buffer_page_end(src_y);
485   free_aligned_buffer_page_end(src_u);
486   free_aligned_buffer_page_end(src_v);
487   free_aligned_buffer_page_end(src_y_16);
488   free_aligned_buffer_page_end(src_u_16);
489   free_aligned_buffer_page_end(src_v_16);
490 
491   return max_diff;
492 }
493 
494 // The following adjustments in dimensions ensure the scale factor will be
495 // exactly achieved.
496 // 2 is chroma subsample.
497 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
498 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
499 
500 #define TEST_FACTOR1(name, filter, nom, denom, max_diff)                     \
501   TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) {                \
502     int diff = I420TestFilter(                                               \
503         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
504         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
505         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,          \
506         benchmark_cpu_info_);                                                \
507     EXPECT_LE(diff, max_diff);                                               \
508   }                                                                          \
509   TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) {                \
510     int diff = I444TestFilter(                                               \
511         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
512         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
513         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,          \
514         benchmark_cpu_info_);                                                \
515     EXPECT_LE(diff, max_diff);                                               \
516   }                                                                          \
517   TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) {           \
518     int diff = I420TestFilter_16(                                            \
519         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
520         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
521         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,          \
522         benchmark_cpu_info_);                                                \
523     EXPECT_LE(diff, max_diff);                                               \
524   }                                                                          \
525   TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) {           \
526     int diff = I444TestFilter_16(                                            \
527         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
528         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
529         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,          \
530         benchmark_cpu_info_);                                                \
531     EXPECT_LE(diff, max_diff);                                               \
532   }
533 
534 // Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
535 // filtering is different fixed point implementations for SSSE3, Neon and C.
536 #define TEST_FACTOR(name, nom, denom, boxdiff) \
537   TEST_FACTOR1(name, None, nom, denom, 0)      \
538   TEST_FACTOR1(name, Linear, nom, denom, 3)    \
539   TEST_FACTOR1(name, Bilinear, nom, denom, 3)  \
540   TEST_FACTOR1(name, Box, nom, denom, boxdiff)
541 
542 TEST_FACTOR(2, 1, 2, 0)
543 TEST_FACTOR(4, 1, 4, 0)
544 TEST_FACTOR(8, 1, 8, 0)
545 TEST_FACTOR(3by4, 3, 4, 1)
546 TEST_FACTOR(3by8, 3, 8, 1)
547 TEST_FACTOR(3, 1, 3, 0)
548 #undef TEST_FACTOR1
549 #undef TEST_FACTOR
550 #undef SX
551 #undef DX
552 
553 #define TEST_SCALETO1(name, width, height, filter, max_diff)                  \
554   TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) {      \
555     int diff = I420TestFilter(benchmark_width_, benchmark_height_, width,     \
556                               height, kFilter##filter, benchmark_iterations_, \
557                               disable_cpu_flags_, benchmark_cpu_info_);       \
558     EXPECT_LE(diff, max_diff);                                                \
559   }                                                                           \
560   TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) {      \
561     int diff = I444TestFilter(benchmark_width_, benchmark_height_, width,     \
562                               height, kFilter##filter, benchmark_iterations_, \
563                               disable_cpu_flags_, benchmark_cpu_info_);       \
564     EXPECT_LE(diff, max_diff);                                                \
565   }                                                                           \
566   TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
567     int diff = I420TestFilter_16(                                             \
568         benchmark_width_, benchmark_height_, width, height, kFilter##filter,  \
569         benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_);      \
570     EXPECT_LE(diff, max_diff);                                                \
571   }                                                                           \
572   TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
573     int diff = I444TestFilter_16(                                             \
574         benchmark_width_, benchmark_height_, width, height, kFilter##filter,  \
575         benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_);      \
576     EXPECT_LE(diff, max_diff);                                                \
577   }                                                                           \
578   TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) {    \
579     int diff = I420TestFilter(width, height, Abs(benchmark_width_),           \
580                               Abs(benchmark_height_), kFilter##filter,        \
581                               benchmark_iterations_, disable_cpu_flags_,      \
582                               benchmark_cpu_info_);                           \
583     EXPECT_LE(diff, max_diff);                                                \
584   }                                                                           \
585   TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) {    \
586     int diff = I444TestFilter(width, height, Abs(benchmark_width_),           \
587                               Abs(benchmark_height_), kFilter##filter,        \
588                               benchmark_iterations_, disable_cpu_flags_,      \
589                               benchmark_cpu_info_);                           \
590     EXPECT_LE(diff, max_diff);                                                \
591   }                                                                           \
592   TEST_F(LibYUVScaleTest,                                                     \
593          I420##name##From##width##x##height##_##filter##_16) {                \
594     int diff = I420TestFilter_16(width, height, Abs(benchmark_width_),        \
595                                  Abs(benchmark_height_), kFilter##filter,     \
596                                  benchmark_iterations_, disable_cpu_flags_,   \
597                                  benchmark_cpu_info_);                        \
598     EXPECT_LE(diff, max_diff);                                                \
599   }                                                                           \
600   TEST_F(LibYUVScaleTest,                                                     \
601          I444##name##From##width##x##height##_##filter##_16) {                \
602     int diff = I444TestFilter_16(width, height, Abs(benchmark_width_),        \
603                                  Abs(benchmark_height_), kFilter##filter,     \
604                                  benchmark_iterations_, disable_cpu_flags_,   \
605                                  benchmark_cpu_info_);                        \
606     EXPECT_LE(diff, max_diff);                                                \
607   }
608 
609 // Test scale to a specified size with all 4 filters.
610 #define TEST_SCALETO(name, width, height)         \
611   TEST_SCALETO1(name, width, height, None, 0)     \
612   TEST_SCALETO1(name, width, height, Linear, 3)   \
613   TEST_SCALETO1(name, width, height, Bilinear, 3) \
614   TEST_SCALETO1(name, width, height, Box, 3)
615 
616 TEST_SCALETO(Scale, 1, 1)
617 TEST_SCALETO(Scale, 320, 240)
618 TEST_SCALETO(Scale, 569, 480)
619 TEST_SCALETO(Scale, 640, 360)
620 TEST_SCALETO(Scale, 1280, 720)
621 TEST_SCALETO(Scale, 1920, 1080)
622 #undef TEST_SCALETO1
623 #undef TEST_SCALETO
624 
625 #ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_Odd_SSSE3)626 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
627   SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
628   SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
629   SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
630   memset(orig_pixels, 0, sizeof(orig_pixels));
631   memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
632   memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
633 
634   int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
635   if (!has_ssse3) {
636     printf("Warning SSSE3 not detected; Skipping test.\n");
637   } else {
638     // TL.
639     orig_pixels[0] = 255u;
640     orig_pixels[1] = 0u;
641     orig_pixels[128 + 0] = 0u;
642     orig_pixels[128 + 1] = 0u;
643     // TR.
644     orig_pixels[2] = 0u;
645     orig_pixels[3] = 100u;
646     orig_pixels[128 + 2] = 0u;
647     orig_pixels[128 + 3] = 0u;
648     // BL.
649     orig_pixels[4] = 0u;
650     orig_pixels[5] = 0u;
651     orig_pixels[128 + 4] = 50u;
652     orig_pixels[128 + 5] = 0u;
653     // BR.
654     orig_pixels[6] = 0u;
655     orig_pixels[7] = 0u;
656     orig_pixels[128 + 6] = 0u;
657     orig_pixels[128 + 7] = 20u;
658     // Odd.
659     orig_pixels[126] = 4u;
660     orig_pixels[127] = 255u;
661     orig_pixels[128 + 126] = 16u;
662     orig_pixels[128 + 127] = 255u;
663 
664     // Test regular half size.
665     ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
666 
667     EXPECT_EQ(64u, dst_pixels_c[0]);
668     EXPECT_EQ(25u, dst_pixels_c[1]);
669     EXPECT_EQ(13u, dst_pixels_c[2]);
670     EXPECT_EQ(5u, dst_pixels_c[3]);
671     EXPECT_EQ(0u, dst_pixels_c[4]);
672     EXPECT_EQ(133u, dst_pixels_c[63]);
673 
674     // Test Odd width version - Last pixel is just 1 horizontal pixel.
675     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
676 
677     EXPECT_EQ(64u, dst_pixels_c[0]);
678     EXPECT_EQ(25u, dst_pixels_c[1]);
679     EXPECT_EQ(13u, dst_pixels_c[2]);
680     EXPECT_EQ(5u, dst_pixels_c[3]);
681     EXPECT_EQ(0u, dst_pixels_c[4]);
682     EXPECT_EQ(10u, dst_pixels_c[63]);
683 
684     // Test one pixel less, should skip the last pixel.
685     memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
686     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
687 
688     EXPECT_EQ(64u, dst_pixels_c[0]);
689     EXPECT_EQ(25u, dst_pixels_c[1]);
690     EXPECT_EQ(13u, dst_pixels_c[2]);
691     EXPECT_EQ(5u, dst_pixels_c[3]);
692     EXPECT_EQ(0u, dst_pixels_c[4]);
693     EXPECT_EQ(0u, dst_pixels_c[63]);
694 
695     // Test regular half size SSSE3.
696     ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
697 
698     EXPECT_EQ(64u, dst_pixels_opt[0]);
699     EXPECT_EQ(25u, dst_pixels_opt[1]);
700     EXPECT_EQ(13u, dst_pixels_opt[2]);
701     EXPECT_EQ(5u, dst_pixels_opt[3]);
702     EXPECT_EQ(0u, dst_pixels_opt[4]);
703     EXPECT_EQ(133u, dst_pixels_opt[63]);
704 
705     // Compare C and SSSE3 match.
706     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
707     ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
708     for (int i = 0; i < 64; ++i) {
709       EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
710     }
711   }
712 }
713 #endif  // HAS_SCALEROWDOWN2_SSSE3
714 
715 extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
716                                     ptrdiff_t src_stride,
717                                     uint16_t* dst,
718                                     int dst_width);
719 extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
720                                    ptrdiff_t src_stride,
721                                    uint16_t* dst,
722                                    int dst_width);
723 extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
724                                  ptrdiff_t src_stride,
725                                  uint16_t* dst,
726                                  int dst_width);
727 
TEST_F(LibYUVScaleTest,TestScaleRowUp2_16)728 TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
729   SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]);  // 2 rows + 1 pixel overrun.
730   SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
731   SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
732 
733   memset(orig_pixels, 0, sizeof(orig_pixels));
734   memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
735   memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
736 
737   for (int i = 0; i < 640 * 2 + 1; ++i) {
738     orig_pixels[i] = i;
739   }
740   ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280);
741   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
742 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
743     int has_neon = TestCpuFlag(kCpuHasNEON);
744     if (has_neon) {
745       ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
746     } else {
747       ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
748     }
749 #elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
750     int has_mmi = TestCpuFlag(kCpuHasMMI);
751     if (has_mmi) {
752       ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
753     } else {
754       ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
755     }
756 #else
757     ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
758 #endif
759   }
760 
761   for (int i = 0; i < 1280; ++i) {
762     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
763   }
764   EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
765   EXPECT_EQ(dst_pixels_c[1279], 800);
766 }
767 
768 extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
769                                          ptrdiff_t src_stride,
770                                          uint16_t* dst,
771                                          int dst_width);
772 
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_16)773 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
774   SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
775   SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
776   SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
777 
778   memset(orig_pixels, 0, sizeof(orig_pixels));
779   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
780   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
781 
782   for (int i = 0; i < 2560 * 2; ++i) {
783     orig_pixels[i] = i;
784   }
785   ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
786   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
787 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
788     int has_neon = TestCpuFlag(kCpuHasNEON);
789     if (has_neon) {
790       ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
791     } else {
792       ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
793     }
794 #else
795     ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
796 #endif
797   }
798 
799   for (int i = 0; i < 1280; ++i) {
800     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
801   }
802 
803   EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
804   EXPECT_EQ(dst_pixels_c[1279], 3839);
805 }
806 
807 // Test scaling plane with 8 bit C vs 16 bit C and return maximum pixel
808 // difference.
809 // 0 = exact.
TestPlaneFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)810 static int TestPlaneFilter_16(int src_width,
811                               int src_height,
812                               int dst_width,
813                               int dst_height,
814                               FilterMode f,
815                               int benchmark_iterations,
816                               int disable_cpu_flags,
817                               int benchmark_cpu_info) {
818   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
819     return 0;
820   }
821 
822   int i;
823   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
824   int src_stride_y = Abs(src_width);
825   int dst_y_plane_size = dst_width * dst_height;
826   int dst_stride_y = dst_width;
827 
828   align_buffer_page_end(src_y, src_y_plane_size);
829   align_buffer_page_end(src_y_16, src_y_plane_size * 2);
830   align_buffer_page_end(dst_y_8, dst_y_plane_size);
831   align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
832   uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
833   uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
834 
835   MemRandomize(src_y, src_y_plane_size);
836   memset(dst_y_8, 0, dst_y_plane_size);
837   memset(dst_y_16, 1, dst_y_plane_size * 2);
838 
839   for (i = 0; i < src_y_plane_size; ++i) {
840     p_src_y_16[i] = src_y[i] & 255;
841   }
842 
843   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
844   ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
845              dst_width, dst_height, f);
846   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
847 
848   for (i = 0; i < benchmark_iterations; ++i) {
849     ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
850                   dst_stride_y, dst_width, dst_height, f);
851   }
852 
853   // Expect an exact match.
854   int max_diff = 0;
855   for (i = 0; i < dst_y_plane_size; ++i) {
856     int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
857     if (abs_diff > max_diff) {
858       max_diff = abs_diff;
859     }
860   }
861 
862   free_aligned_buffer_page_end(dst_y_8);
863   free_aligned_buffer_page_end(dst_y_16);
864   free_aligned_buffer_page_end(src_y);
865   free_aligned_buffer_page_end(src_y_16);
866 
867   return max_diff;
868 }
869 
870 // The following adjustments in dimensions ensure the scale factor will be
871 // exactly achieved.
872 // 2 is chroma subsample.
873 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
874 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
875 
876 #define TEST_FACTOR1(name, filter, nom, denom, max_diff)                     \
877   TEST_F(LibYUVScaleTest, ScalePlaneDownBy##name##_##filter##_16) {          \
878     int diff = TestPlaneFilter_16(                                           \
879         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
880         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
881         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,          \
882         benchmark_cpu_info_);                                                \
883     EXPECT_LE(diff, max_diff);                                               \
884   }
885 
886 // Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
887 // filtering is different fixed point implementations for SSSE3, Neon and C.
888 #define TEST_FACTOR(name, nom, denom, boxdiff)      \
889   TEST_FACTOR1(name, None, nom, denom, 0)           \
890   TEST_FACTOR1(name, Linear, nom, denom, boxdiff)   \
891   TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
892   TEST_FACTOR1(name, Box, nom, denom, boxdiff)
893 
894 TEST_FACTOR(2, 1, 2, 0)
895 TEST_FACTOR(4, 1, 4, 0)
896 TEST_FACTOR(8, 1, 8, 0)
897 TEST_FACTOR(3by4, 3, 4, 1)
898 TEST_FACTOR(3by8, 3, 8, 1)
899 TEST_FACTOR(3, 1, 3, 0)
900 #undef TEST_FACTOR1
901 #undef TEST_FACTOR
902 #undef SX
903 #undef DX
904 }  // namespace libyuv
905