1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <stdlib.h>
12 #include <time.h>
13
14 #include "../unit_test/unit_test.h"
15 #include "libyuv/cpu_id.h"
16 #include "libyuv/scale.h"
17 #include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C
18
19 #define STRINGIZE(line) #line
20 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
21
22 namespace libyuv {
23
24 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I420TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)25 static int I420TestFilter(int src_width,
26 int src_height,
27 int dst_width,
28 int dst_height,
29 FilterMode f,
30 int benchmark_iterations,
31 int disable_cpu_flags,
32 int benchmark_cpu_info) {
33 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
34 return 0;
35 }
36
37 int i, j;
38 int src_width_uv = (Abs(src_width) + 1) >> 1;
39 int src_height_uv = (Abs(src_height) + 1) >> 1;
40
41 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
42 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
43
44 int src_stride_y = Abs(src_width);
45 int src_stride_uv = src_width_uv;
46
47 align_buffer_page_end(src_y, src_y_plane_size);
48 align_buffer_page_end(src_u, src_uv_plane_size);
49 align_buffer_page_end(src_v, src_uv_plane_size);
50 if (!src_y || !src_u || !src_v) {
51 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
52 return 0;
53 }
54 MemRandomize(src_y, src_y_plane_size);
55 MemRandomize(src_u, src_uv_plane_size);
56 MemRandomize(src_v, src_uv_plane_size);
57
58 int dst_width_uv = (dst_width + 1) >> 1;
59 int dst_height_uv = (dst_height + 1) >> 1;
60
61 int64_t dst_y_plane_size = (dst_width) * (dst_height);
62 int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
63
64 int dst_stride_y = dst_width;
65 int dst_stride_uv = dst_width_uv;
66
67 align_buffer_page_end(dst_y_c, dst_y_plane_size);
68 align_buffer_page_end(dst_u_c, dst_uv_plane_size);
69 align_buffer_page_end(dst_v_c, dst_uv_plane_size);
70 align_buffer_page_end(dst_y_opt, dst_y_plane_size);
71 align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
72 align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
73 if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
74 !dst_v_opt) {
75 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
76 return 0;
77 }
78
79 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
80 double c_time = get_time();
81 I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
82 src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
83 dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
84 c_time = (get_time() - c_time);
85
86 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
87 double opt_time = get_time();
88 for (i = 0; i < benchmark_iterations; ++i) {
89 I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
90 src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
91 dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
92 f);
93 }
94 opt_time = (get_time() - opt_time) / benchmark_iterations;
95 // Report performance of C vs OPT.
96 printf("filter %d - %8d us C - %8d us OPT\n", f,
97 static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
98
99 // C version may be a little off from the optimized. Order of
100 // operations may introduce rounding somewhere. So do a difference
101 // of the buffers and look to see that the max difference is not
102 // over 3.
103 int max_diff = 0;
104 for (i = 0; i < (dst_height); ++i) {
105 for (j = 0; j < (dst_width); ++j) {
106 int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
107 dst_y_opt[(i * dst_stride_y) + j]);
108 if (abs_diff > max_diff) {
109 max_diff = abs_diff;
110 }
111 }
112 }
113
114 for (i = 0; i < (dst_height_uv); ++i) {
115 for (j = 0; j < (dst_width_uv); ++j) {
116 int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
117 dst_u_opt[(i * dst_stride_uv) + j]);
118 if (abs_diff > max_diff) {
119 max_diff = abs_diff;
120 }
121 abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
122 dst_v_opt[(i * dst_stride_uv) + j]);
123 if (abs_diff > max_diff) {
124 max_diff = abs_diff;
125 }
126 }
127 }
128
129 free_aligned_buffer_page_end(dst_y_c);
130 free_aligned_buffer_page_end(dst_u_c);
131 free_aligned_buffer_page_end(dst_v_c);
132 free_aligned_buffer_page_end(dst_y_opt);
133 free_aligned_buffer_page_end(dst_u_opt);
134 free_aligned_buffer_page_end(dst_v_opt);
135 free_aligned_buffer_page_end(src_y);
136 free_aligned_buffer_page_end(src_u);
137 free_aligned_buffer_page_end(src_v);
138
139 return max_diff;
140 }
141
142 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
143 // 0 = exact.
I420TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)144 static int I420TestFilter_16(int src_width,
145 int src_height,
146 int dst_width,
147 int dst_height,
148 FilterMode f,
149 int benchmark_iterations,
150 int disable_cpu_flags,
151 int benchmark_cpu_info) {
152 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
153 return 0;
154 }
155
156 int i;
157 int src_width_uv = (Abs(src_width) + 1) >> 1;
158 int src_height_uv = (Abs(src_height) + 1) >> 1;
159
160 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
161 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
162
163 int src_stride_y = Abs(src_width);
164 int src_stride_uv = src_width_uv;
165
166 align_buffer_page_end(src_y, src_y_plane_size);
167 align_buffer_page_end(src_u, src_uv_plane_size);
168 align_buffer_page_end(src_v, src_uv_plane_size);
169 align_buffer_page_end(src_y_16, src_y_plane_size * 2);
170 align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
171 align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
172 if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
173 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
174 return 0;
175 }
176 uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
177 uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
178 uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
179
180 MemRandomize(src_y, src_y_plane_size);
181 MemRandomize(src_u, src_uv_plane_size);
182 MemRandomize(src_v, src_uv_plane_size);
183
184 for (i = 0; i < src_y_plane_size; ++i) {
185 p_src_y_16[i] = src_y[i];
186 }
187 for (i = 0; i < src_uv_plane_size; ++i) {
188 p_src_u_16[i] = src_u[i];
189 p_src_v_16[i] = src_v[i];
190 }
191
192 int dst_width_uv = (dst_width + 1) >> 1;
193 int dst_height_uv = (dst_height + 1) >> 1;
194
195 int dst_y_plane_size = (dst_width) * (dst_height);
196 int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
197
198 int dst_stride_y = dst_width;
199 int dst_stride_uv = dst_width_uv;
200
201 align_buffer_page_end(dst_y_8, dst_y_plane_size);
202 align_buffer_page_end(dst_u_8, dst_uv_plane_size);
203 align_buffer_page_end(dst_v_8, dst_uv_plane_size);
204 align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
205 align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
206 align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
207
208 uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
209 uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
210 uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
211
212 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
213 I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
214 src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
215 dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
216 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
217 for (i = 0; i < benchmark_iterations; ++i) {
218 I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
219 p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
220 dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
221 dst_stride_uv, dst_width, dst_height, f);
222 }
223
224 // Expect an exact match.
225 int max_diff = 0;
226 for (i = 0; i < dst_y_plane_size; ++i) {
227 int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
228 if (abs_diff > max_diff) {
229 max_diff = abs_diff;
230 }
231 }
232 for (i = 0; i < dst_uv_plane_size; ++i) {
233 int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
234 if (abs_diff > max_diff) {
235 max_diff = abs_diff;
236 }
237 abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
238 if (abs_diff > max_diff) {
239 max_diff = abs_diff;
240 }
241 }
242
243 free_aligned_buffer_page_end(dst_y_8);
244 free_aligned_buffer_page_end(dst_u_8);
245 free_aligned_buffer_page_end(dst_v_8);
246 free_aligned_buffer_page_end(dst_y_16);
247 free_aligned_buffer_page_end(dst_u_16);
248 free_aligned_buffer_page_end(dst_v_16);
249 free_aligned_buffer_page_end(src_y);
250 free_aligned_buffer_page_end(src_u);
251 free_aligned_buffer_page_end(src_v);
252 free_aligned_buffer_page_end(src_y_16);
253 free_aligned_buffer_page_end(src_u_16);
254 free_aligned_buffer_page_end(src_v_16);
255
256 return max_diff;
257 }
258
259 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I444TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)260 static int I444TestFilter(int src_width,
261 int src_height,
262 int dst_width,
263 int dst_height,
264 FilterMode f,
265 int benchmark_iterations,
266 int disable_cpu_flags,
267 int benchmark_cpu_info) {
268 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
269 return 0;
270 }
271
272 int i, j;
273 int src_width_uv = Abs(src_width);
274 int src_height_uv = Abs(src_height);
275
276 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
277 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
278
279 int src_stride_y = Abs(src_width);
280 int src_stride_uv = src_width_uv;
281
282 align_buffer_page_end(src_y, src_y_plane_size);
283 align_buffer_page_end(src_u, src_uv_plane_size);
284 align_buffer_page_end(src_v, src_uv_plane_size);
285 if (!src_y || !src_u || !src_v) {
286 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
287 return 0;
288 }
289 MemRandomize(src_y, src_y_plane_size);
290 MemRandomize(src_u, src_uv_plane_size);
291 MemRandomize(src_v, src_uv_plane_size);
292
293 int dst_width_uv = dst_width;
294 int dst_height_uv = dst_height;
295
296 int64_t dst_y_plane_size = (dst_width) * (dst_height);
297 int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
298
299 int dst_stride_y = dst_width;
300 int dst_stride_uv = dst_width_uv;
301
302 align_buffer_page_end(dst_y_c, dst_y_plane_size);
303 align_buffer_page_end(dst_u_c, dst_uv_plane_size);
304 align_buffer_page_end(dst_v_c, dst_uv_plane_size);
305 align_buffer_page_end(dst_y_opt, dst_y_plane_size);
306 align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
307 align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
308 if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
309 !dst_v_opt) {
310 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
311 return 0;
312 }
313
314 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
315 double c_time = get_time();
316 I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
317 src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
318 dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
319 c_time = (get_time() - c_time);
320
321 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
322 double opt_time = get_time();
323 for (i = 0; i < benchmark_iterations; ++i) {
324 I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
325 src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
326 dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
327 f);
328 }
329 opt_time = (get_time() - opt_time) / benchmark_iterations;
330 // Report performance of C vs OPT.
331 printf("filter %d - %8d us C - %8d us OPT\n", f,
332 static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
333
334 // C version may be a little off from the optimized. Order of
335 // operations may introduce rounding somewhere. So do a difference
336 // of the buffers and look to see that the max difference is not
337 // over 3.
338 int max_diff = 0;
339 for (i = 0; i < (dst_height); ++i) {
340 for (j = 0; j < (dst_width); ++j) {
341 int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
342 dst_y_opt[(i * dst_stride_y) + j]);
343 if (abs_diff > max_diff) {
344 max_diff = abs_diff;
345 }
346 }
347 }
348
349 for (i = 0; i < (dst_height_uv); ++i) {
350 for (j = 0; j < (dst_width_uv); ++j) {
351 int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
352 dst_u_opt[(i * dst_stride_uv) + j]);
353 if (abs_diff > max_diff) {
354 max_diff = abs_diff;
355 }
356 abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
357 dst_v_opt[(i * dst_stride_uv) + j]);
358 if (abs_diff > max_diff) {
359 max_diff = abs_diff;
360 }
361 }
362 }
363
364 free_aligned_buffer_page_end(dst_y_c);
365 free_aligned_buffer_page_end(dst_u_c);
366 free_aligned_buffer_page_end(dst_v_c);
367 free_aligned_buffer_page_end(dst_y_opt);
368 free_aligned_buffer_page_end(dst_u_opt);
369 free_aligned_buffer_page_end(dst_v_opt);
370 free_aligned_buffer_page_end(src_y);
371 free_aligned_buffer_page_end(src_u);
372 free_aligned_buffer_page_end(src_v);
373
374 return max_diff;
375 }
376
377 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
378 // 0 = exact.
I444TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)379 static int I444TestFilter_16(int src_width,
380 int src_height,
381 int dst_width,
382 int dst_height,
383 FilterMode f,
384 int benchmark_iterations,
385 int disable_cpu_flags,
386 int benchmark_cpu_info) {
387 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
388 return 0;
389 }
390
391 int i;
392 int src_width_uv = Abs(src_width);
393 int src_height_uv = Abs(src_height);
394
395 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
396 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
397
398 int src_stride_y = Abs(src_width);
399 int src_stride_uv = src_width_uv;
400
401 align_buffer_page_end(src_y, src_y_plane_size);
402 align_buffer_page_end(src_u, src_uv_plane_size);
403 align_buffer_page_end(src_v, src_uv_plane_size);
404 align_buffer_page_end(src_y_16, src_y_plane_size * 2);
405 align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
406 align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
407 if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
408 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
409 return 0;
410 }
411 uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
412 uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
413 uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
414
415 MemRandomize(src_y, src_y_plane_size);
416 MemRandomize(src_u, src_uv_plane_size);
417 MemRandomize(src_v, src_uv_plane_size);
418
419 for (i = 0; i < src_y_plane_size; ++i) {
420 p_src_y_16[i] = src_y[i];
421 }
422 for (i = 0; i < src_uv_plane_size; ++i) {
423 p_src_u_16[i] = src_u[i];
424 p_src_v_16[i] = src_v[i];
425 }
426
427 int dst_width_uv = dst_width;
428 int dst_height_uv = dst_height;
429
430 int dst_y_plane_size = (dst_width) * (dst_height);
431 int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
432
433 int dst_stride_y = dst_width;
434 int dst_stride_uv = dst_width_uv;
435
436 align_buffer_page_end(dst_y_8, dst_y_plane_size);
437 align_buffer_page_end(dst_u_8, dst_uv_plane_size);
438 align_buffer_page_end(dst_v_8, dst_uv_plane_size);
439 align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
440 align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
441 align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
442
443 uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
444 uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
445 uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
446
447 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
448 I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
449 src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
450 dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
451 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
452 for (i = 0; i < benchmark_iterations; ++i) {
453 I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
454 p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
455 dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
456 dst_stride_uv, dst_width, dst_height, f);
457 }
458
459 // Expect an exact match.
460 int max_diff = 0;
461 for (i = 0; i < dst_y_plane_size; ++i) {
462 int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
463 if (abs_diff > max_diff) {
464 max_diff = abs_diff;
465 }
466 }
467 for (i = 0; i < dst_uv_plane_size; ++i) {
468 int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
469 if (abs_diff > max_diff) {
470 max_diff = abs_diff;
471 }
472 abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
473 if (abs_diff > max_diff) {
474 max_diff = abs_diff;
475 }
476 }
477
478 free_aligned_buffer_page_end(dst_y_8);
479 free_aligned_buffer_page_end(dst_u_8);
480 free_aligned_buffer_page_end(dst_v_8);
481 free_aligned_buffer_page_end(dst_y_16);
482 free_aligned_buffer_page_end(dst_u_16);
483 free_aligned_buffer_page_end(dst_v_16);
484 free_aligned_buffer_page_end(src_y);
485 free_aligned_buffer_page_end(src_u);
486 free_aligned_buffer_page_end(src_v);
487 free_aligned_buffer_page_end(src_y_16);
488 free_aligned_buffer_page_end(src_u_16);
489 free_aligned_buffer_page_end(src_v_16);
490
491 return max_diff;
492 }
493
494 // The following adjustments in dimensions ensure the scale factor will be
495 // exactly achieved.
496 // 2 is chroma subsample.
497 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
498 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
499
500 #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
501 TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
502 int diff = I420TestFilter( \
503 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
504 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
505 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
506 benchmark_cpu_info_); \
507 EXPECT_LE(diff, max_diff); \
508 } \
509 TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
510 int diff = I444TestFilter( \
511 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
512 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
513 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
514 benchmark_cpu_info_); \
515 EXPECT_LE(diff, max_diff); \
516 } \
517 TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \
518 int diff = I420TestFilter_16( \
519 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
520 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
521 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
522 benchmark_cpu_info_); \
523 EXPECT_LE(diff, max_diff); \
524 } \
525 TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \
526 int diff = I444TestFilter_16( \
527 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
528 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
529 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
530 benchmark_cpu_info_); \
531 EXPECT_LE(diff, max_diff); \
532 }
533
534 // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
535 // filtering is different fixed point implementations for SSSE3, Neon and C.
536 #define TEST_FACTOR(name, nom, denom, boxdiff) \
537 TEST_FACTOR1(name, None, nom, denom, 0) \
538 TEST_FACTOR1(name, Linear, nom, denom, 3) \
539 TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
540 TEST_FACTOR1(name, Box, nom, denom, boxdiff)
541
542 TEST_FACTOR(2, 1, 2, 0)
543 TEST_FACTOR(4, 1, 4, 0)
544 TEST_FACTOR(8, 1, 8, 0)
545 TEST_FACTOR(3by4, 3, 4, 1)
546 TEST_FACTOR(3by8, 3, 8, 1)
547 TEST_FACTOR(3, 1, 3, 0)
548 #undef TEST_FACTOR1
549 #undef TEST_FACTOR
550 #undef SX
551 #undef DX
552
553 #define TEST_SCALETO1(name, width, height, filter, max_diff) \
554 TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
555 int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
556 height, kFilter##filter, benchmark_iterations_, \
557 disable_cpu_flags_, benchmark_cpu_info_); \
558 EXPECT_LE(diff, max_diff); \
559 } \
560 TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
561 int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
562 height, kFilter##filter, benchmark_iterations_, \
563 disable_cpu_flags_, benchmark_cpu_info_); \
564 EXPECT_LE(diff, max_diff); \
565 } \
566 TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
567 int diff = I420TestFilter_16( \
568 benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
569 benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
570 EXPECT_LE(diff, max_diff); \
571 } \
572 TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
573 int diff = I444TestFilter_16( \
574 benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
575 benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
576 EXPECT_LE(diff, max_diff); \
577 } \
578 TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
579 int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
580 Abs(benchmark_height_), kFilter##filter, \
581 benchmark_iterations_, disable_cpu_flags_, \
582 benchmark_cpu_info_); \
583 EXPECT_LE(diff, max_diff); \
584 } \
585 TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
586 int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
587 Abs(benchmark_height_), kFilter##filter, \
588 benchmark_iterations_, disable_cpu_flags_, \
589 benchmark_cpu_info_); \
590 EXPECT_LE(diff, max_diff); \
591 } \
592 TEST_F(LibYUVScaleTest, \
593 I420##name##From##width##x##height##_##filter##_16) { \
594 int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
595 Abs(benchmark_height_), kFilter##filter, \
596 benchmark_iterations_, disable_cpu_flags_, \
597 benchmark_cpu_info_); \
598 EXPECT_LE(diff, max_diff); \
599 } \
600 TEST_F(LibYUVScaleTest, \
601 I444##name##From##width##x##height##_##filter##_16) { \
602 int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
603 Abs(benchmark_height_), kFilter##filter, \
604 benchmark_iterations_, disable_cpu_flags_, \
605 benchmark_cpu_info_); \
606 EXPECT_LE(diff, max_diff); \
607 }
608
609 // Test scale to a specified size with all 4 filters.
610 #define TEST_SCALETO(name, width, height) \
611 TEST_SCALETO1(name, width, height, None, 0) \
612 TEST_SCALETO1(name, width, height, Linear, 3) \
613 TEST_SCALETO1(name, width, height, Bilinear, 3) \
614 TEST_SCALETO1(name, width, height, Box, 3)
615
616 TEST_SCALETO(Scale, 1, 1)
617 TEST_SCALETO(Scale, 320, 240)
618 TEST_SCALETO(Scale, 569, 480)
619 TEST_SCALETO(Scale, 640, 360)
620 TEST_SCALETO(Scale, 1280, 720)
621 TEST_SCALETO(Scale, 1920, 1080)
622 #undef TEST_SCALETO1
623 #undef TEST_SCALETO
624
625 #ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_Odd_SSSE3)626 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
627 SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
628 SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
629 SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
630 memset(orig_pixels, 0, sizeof(orig_pixels));
631 memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
632 memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
633
634 int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
635 if (!has_ssse3) {
636 printf("Warning SSSE3 not detected; Skipping test.\n");
637 } else {
638 // TL.
639 orig_pixels[0] = 255u;
640 orig_pixels[1] = 0u;
641 orig_pixels[128 + 0] = 0u;
642 orig_pixels[128 + 1] = 0u;
643 // TR.
644 orig_pixels[2] = 0u;
645 orig_pixels[3] = 100u;
646 orig_pixels[128 + 2] = 0u;
647 orig_pixels[128 + 3] = 0u;
648 // BL.
649 orig_pixels[4] = 0u;
650 orig_pixels[5] = 0u;
651 orig_pixels[128 + 4] = 50u;
652 orig_pixels[128 + 5] = 0u;
653 // BR.
654 orig_pixels[6] = 0u;
655 orig_pixels[7] = 0u;
656 orig_pixels[128 + 6] = 0u;
657 orig_pixels[128 + 7] = 20u;
658 // Odd.
659 orig_pixels[126] = 4u;
660 orig_pixels[127] = 255u;
661 orig_pixels[128 + 126] = 16u;
662 orig_pixels[128 + 127] = 255u;
663
664 // Test regular half size.
665 ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
666
667 EXPECT_EQ(64u, dst_pixels_c[0]);
668 EXPECT_EQ(25u, dst_pixels_c[1]);
669 EXPECT_EQ(13u, dst_pixels_c[2]);
670 EXPECT_EQ(5u, dst_pixels_c[3]);
671 EXPECT_EQ(0u, dst_pixels_c[4]);
672 EXPECT_EQ(133u, dst_pixels_c[63]);
673
674 // Test Odd width version - Last pixel is just 1 horizontal pixel.
675 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
676
677 EXPECT_EQ(64u, dst_pixels_c[0]);
678 EXPECT_EQ(25u, dst_pixels_c[1]);
679 EXPECT_EQ(13u, dst_pixels_c[2]);
680 EXPECT_EQ(5u, dst_pixels_c[3]);
681 EXPECT_EQ(0u, dst_pixels_c[4]);
682 EXPECT_EQ(10u, dst_pixels_c[63]);
683
684 // Test one pixel less, should skip the last pixel.
685 memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
686 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
687
688 EXPECT_EQ(64u, dst_pixels_c[0]);
689 EXPECT_EQ(25u, dst_pixels_c[1]);
690 EXPECT_EQ(13u, dst_pixels_c[2]);
691 EXPECT_EQ(5u, dst_pixels_c[3]);
692 EXPECT_EQ(0u, dst_pixels_c[4]);
693 EXPECT_EQ(0u, dst_pixels_c[63]);
694
695 // Test regular half size SSSE3.
696 ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
697
698 EXPECT_EQ(64u, dst_pixels_opt[0]);
699 EXPECT_EQ(25u, dst_pixels_opt[1]);
700 EXPECT_EQ(13u, dst_pixels_opt[2]);
701 EXPECT_EQ(5u, dst_pixels_opt[3]);
702 EXPECT_EQ(0u, dst_pixels_opt[4]);
703 EXPECT_EQ(133u, dst_pixels_opt[63]);
704
705 // Compare C and SSSE3 match.
706 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
707 ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
708 for (int i = 0; i < 64; ++i) {
709 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
710 }
711 }
712 }
713 #endif // HAS_SCALEROWDOWN2_SSSE3
714
715 extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
716 ptrdiff_t src_stride,
717 uint16_t* dst,
718 int dst_width);
719 extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
720 ptrdiff_t src_stride,
721 uint16_t* dst,
722 int dst_width);
723 extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
724 ptrdiff_t src_stride,
725 uint16_t* dst,
726 int dst_width);
727
TEST_F(LibYUVScaleTest,TestScaleRowUp2_16)728 TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
729 SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun.
730 SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
731 SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
732
733 memset(orig_pixels, 0, sizeof(orig_pixels));
734 memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
735 memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
736
737 for (int i = 0; i < 640 * 2 + 1; ++i) {
738 orig_pixels[i] = i;
739 }
740 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280);
741 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
742 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
743 int has_neon = TestCpuFlag(kCpuHasNEON);
744 if (has_neon) {
745 ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
746 } else {
747 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
748 }
749 #elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
750 int has_mmi = TestCpuFlag(kCpuHasMMI);
751 if (has_mmi) {
752 ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
753 } else {
754 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
755 }
756 #else
757 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
758 #endif
759 }
760
761 for (int i = 0; i < 1280; ++i) {
762 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
763 }
764 EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
765 EXPECT_EQ(dst_pixels_c[1279], 800);
766 }
767
768 extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
769 ptrdiff_t src_stride,
770 uint16_t* dst,
771 int dst_width);
772
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_16)773 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
774 SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
775 SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
776 SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
777
778 memset(orig_pixels, 0, sizeof(orig_pixels));
779 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
780 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
781
782 for (int i = 0; i < 2560 * 2; ++i) {
783 orig_pixels[i] = i;
784 }
785 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
786 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
787 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
788 int has_neon = TestCpuFlag(kCpuHasNEON);
789 if (has_neon) {
790 ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
791 } else {
792 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
793 }
794 #else
795 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
796 #endif
797 }
798
799 for (int i = 0; i < 1280; ++i) {
800 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
801 }
802
803 EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
804 EXPECT_EQ(dst_pixels_c[1279], 3839);
805 }
806
807 // Test scaling plane with 8 bit C vs 16 bit C and return maximum pixel
808 // difference.
809 // 0 = exact.
TestPlaneFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)810 static int TestPlaneFilter_16(int src_width,
811 int src_height,
812 int dst_width,
813 int dst_height,
814 FilterMode f,
815 int benchmark_iterations,
816 int disable_cpu_flags,
817 int benchmark_cpu_info) {
818 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
819 return 0;
820 }
821
822 int i;
823 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
824 int src_stride_y = Abs(src_width);
825 int dst_y_plane_size = dst_width * dst_height;
826 int dst_stride_y = dst_width;
827
828 align_buffer_page_end(src_y, src_y_plane_size);
829 align_buffer_page_end(src_y_16, src_y_plane_size * 2);
830 align_buffer_page_end(dst_y_8, dst_y_plane_size);
831 align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
832 uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
833 uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
834
835 MemRandomize(src_y, src_y_plane_size);
836 memset(dst_y_8, 0, dst_y_plane_size);
837 memset(dst_y_16, 1, dst_y_plane_size * 2);
838
839 for (i = 0; i < src_y_plane_size; ++i) {
840 p_src_y_16[i] = src_y[i] & 255;
841 }
842
843 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
844 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
845 dst_width, dst_height, f);
846 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
847
848 for (i = 0; i < benchmark_iterations; ++i) {
849 ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
850 dst_stride_y, dst_width, dst_height, f);
851 }
852
853 // Expect an exact match.
854 int max_diff = 0;
855 for (i = 0; i < dst_y_plane_size; ++i) {
856 int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
857 if (abs_diff > max_diff) {
858 max_diff = abs_diff;
859 }
860 }
861
862 free_aligned_buffer_page_end(dst_y_8);
863 free_aligned_buffer_page_end(dst_y_16);
864 free_aligned_buffer_page_end(src_y);
865 free_aligned_buffer_page_end(src_y_16);
866
867 return max_diff;
868 }
869
870 // The following adjustments in dimensions ensure the scale factor will be
871 // exactly achieved.
872 // 2 is chroma subsample.
873 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
874 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
875
876 #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
877 TEST_F(LibYUVScaleTest, ScalePlaneDownBy##name##_##filter##_16) { \
878 int diff = TestPlaneFilter_16( \
879 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
880 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
881 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
882 benchmark_cpu_info_); \
883 EXPECT_LE(diff, max_diff); \
884 }
885
886 // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
887 // filtering is different fixed point implementations for SSSE3, Neon and C.
888 #define TEST_FACTOR(name, nom, denom, boxdiff) \
889 TEST_FACTOR1(name, None, nom, denom, 0) \
890 TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \
891 TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
892 TEST_FACTOR1(name, Box, nom, denom, boxdiff)
893
894 TEST_FACTOR(2, 1, 2, 0)
895 TEST_FACTOR(4, 1, 4, 0)
896 TEST_FACTOR(8, 1, 8, 0)
897 TEST_FACTOR(3by4, 3, 4, 1)
898 TEST_FACTOR(3by8, 3, 8, 1)
899 TEST_FACTOR(3, 1, 3, 0)
900 #undef TEST_FACTOR1
901 #undef TEST_FACTOR
902 #undef SX
903 #undef DX
904 } // namespace libyuv
905