1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12 #include <stdlib.h>
13 #include <time.h>
14
15 // row.h defines SIMD_ALIGNED, overriding unit_test.h
16 #include "libyuv/row.h" /* For ScaleSumSamples_Neon */
17
18 #include "../unit_test/unit_test.h"
19 #include "libyuv/compare.h"
20 #include "libyuv/convert.h"
21 #include "libyuv/convert_argb.h"
22 #include "libyuv/convert_from.h"
23 #include "libyuv/convert_from_argb.h"
24 #include "libyuv/cpu_id.h"
25 #include "libyuv/planar_functions.h"
26 #include "libyuv/rotate.h"
27
28 namespace libyuv {
29
TEST_F(LibYUVPlanarTest,TestAttenuate)30 TEST_F(LibYUVPlanarTest, TestAttenuate) {
31 const int kSize = 1280 * 4;
32 align_buffer_page_end(orig_pixels, kSize);
33 align_buffer_page_end(atten_pixels, kSize);
34 align_buffer_page_end(unatten_pixels, kSize);
35 align_buffer_page_end(atten2_pixels, kSize);
36
37 // Test unattenuation clamps
38 orig_pixels[0 * 4 + 0] = 200u;
39 orig_pixels[0 * 4 + 1] = 129u;
40 orig_pixels[0 * 4 + 2] = 127u;
41 orig_pixels[0 * 4 + 3] = 128u;
42 // Test unattenuation transparent and opaque are unaffected
43 orig_pixels[1 * 4 + 0] = 16u;
44 orig_pixels[1 * 4 + 1] = 64u;
45 orig_pixels[1 * 4 + 2] = 192u;
46 orig_pixels[1 * 4 + 3] = 0u;
47 orig_pixels[2 * 4 + 0] = 16u;
48 orig_pixels[2 * 4 + 1] = 64u;
49 orig_pixels[2 * 4 + 2] = 192u;
50 orig_pixels[2 * 4 + 3] = 255u;
51 orig_pixels[3 * 4 + 0] = 16u;
52 orig_pixels[3 * 4 + 1] = 64u;
53 orig_pixels[3 * 4 + 2] = 192u;
54 orig_pixels[3 * 4 + 3] = 128u;
55 ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1);
56 EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
57 EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
58 EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
59 EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]);
60 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]);
61 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
62 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
63 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
64 EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]);
65 EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]);
66 EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]);
67 EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]);
68 EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]);
69 EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]);
70 EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]);
71 EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]);
72
73 for (int i = 0; i < 1280; ++i) {
74 orig_pixels[i * 4 + 0] = i;
75 orig_pixels[i * 4 + 1] = i / 2;
76 orig_pixels[i * 4 + 2] = i / 3;
77 orig_pixels[i * 4 + 3] = i;
78 }
79 ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1);
80 ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1);
81 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
82 ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
83 }
84 for (int i = 0; i < 1280; ++i) {
85 EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2);
86 EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2);
87 EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2);
88 EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2);
89 }
90 // Make sure transparent, 50% and opaque are fully accurate.
91 EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
92 EXPECT_EQ(0, atten_pixels[0 * 4 + 1]);
93 EXPECT_EQ(0, atten_pixels[0 * 4 + 2]);
94 EXPECT_EQ(0, atten_pixels[0 * 4 + 3]);
95 EXPECT_EQ(64, atten_pixels[128 * 4 + 0]);
96 EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
97 EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
98 EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
99 EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1);
100 EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1);
101 EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1);
102 EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
103
104 free_aligned_buffer_page_end(atten2_pixels);
105 free_aligned_buffer_page_end(unatten_pixels);
106 free_aligned_buffer_page_end(atten_pixels);
107 free_aligned_buffer_page_end(orig_pixels);
108 }
109
TestAttenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)110 static int TestAttenuateI(int width,
111 int height,
112 int benchmark_iterations,
113 int disable_cpu_flags,
114 int benchmark_cpu_info,
115 int invert,
116 int off) {
117 if (width < 1) {
118 width = 1;
119 }
120 const int kBpp = 4;
121 const int kStride = width * kBpp;
122 align_buffer_page_end(src_argb, kStride * height + off);
123 align_buffer_page_end(dst_argb_c, kStride * height);
124 align_buffer_page_end(dst_argb_opt, kStride * height);
125 for (int i = 0; i < kStride * height; ++i) {
126 src_argb[i + off] = (fastrand() & 0xff);
127 }
128 memset(dst_argb_c, 0, kStride * height);
129 memset(dst_argb_opt, 0, kStride * height);
130
131 MaskCpuFlags(disable_cpu_flags);
132 ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
133 invert * height);
134 MaskCpuFlags(benchmark_cpu_info);
135 for (int i = 0; i < benchmark_iterations; ++i) {
136 ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
137 invert * height);
138 }
139 int max_diff = 0;
140 for (int i = 0; i < kStride * height; ++i) {
141 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
142 static_cast<int>(dst_argb_opt[i]));
143 if (abs_diff > max_diff) {
144 max_diff = abs_diff;
145 }
146 }
147 free_aligned_buffer_page_end(src_argb);
148 free_aligned_buffer_page_end(dst_argb_c);
149 free_aligned_buffer_page_end(dst_argb_opt);
150 return max_diff;
151 }
152
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Any)153 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
154 int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_,
155 benchmark_iterations_, disable_cpu_flags_,
156 benchmark_cpu_info_, +1, 0);
157 EXPECT_LE(max_diff, 2);
158 }
159
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Unaligned)160 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
161 int max_diff =
162 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
163 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
164 EXPECT_LE(max_diff, 2);
165 }
166
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Invert)167 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
168 int max_diff =
169 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
170 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
171 EXPECT_LE(max_diff, 2);
172 }
173
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Opt)174 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
175 int max_diff =
176 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
177 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
178 EXPECT_LE(max_diff, 2);
179 }
180
TestUnattenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)181 static int TestUnattenuateI(int width,
182 int height,
183 int benchmark_iterations,
184 int disable_cpu_flags,
185 int benchmark_cpu_info,
186 int invert,
187 int off) {
188 if (width < 1) {
189 width = 1;
190 }
191 const int kBpp = 4;
192 const int kStride = width * kBpp;
193 align_buffer_page_end(src_argb, kStride * height + off);
194 align_buffer_page_end(dst_argb_c, kStride * height);
195 align_buffer_page_end(dst_argb_opt, kStride * height);
196 for (int i = 0; i < kStride * height; ++i) {
197 src_argb[i + off] = (fastrand() & 0xff);
198 }
199 ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width,
200 height);
201 memset(dst_argb_c, 0, kStride * height);
202 memset(dst_argb_opt, 0, kStride * height);
203
204 MaskCpuFlags(disable_cpu_flags);
205 ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
206 invert * height);
207 MaskCpuFlags(benchmark_cpu_info);
208 for (int i = 0; i < benchmark_iterations; ++i) {
209 ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
210 invert * height);
211 }
212 int max_diff = 0;
213 for (int i = 0; i < kStride * height; ++i) {
214 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
215 static_cast<int>(dst_argb_opt[i]));
216 if (abs_diff > max_diff) {
217 max_diff = abs_diff;
218 }
219 }
220 free_aligned_buffer_page_end(src_argb);
221 free_aligned_buffer_page_end(dst_argb_c);
222 free_aligned_buffer_page_end(dst_argb_opt);
223 return max_diff;
224 }
225
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Any)226 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
227 int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_,
228 benchmark_iterations_, disable_cpu_flags_,
229 benchmark_cpu_info_, +1, 0);
230 EXPECT_LE(max_diff, 2);
231 }
232
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Unaligned)233 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
234 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
235 benchmark_iterations_, disable_cpu_flags_,
236 benchmark_cpu_info_, +1, 1);
237 EXPECT_LE(max_diff, 2);
238 }
239
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Invert)240 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
241 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
242 benchmark_iterations_, disable_cpu_flags_,
243 benchmark_cpu_info_, -1, 0);
244 EXPECT_LE(max_diff, 2);
245 }
246
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Opt)247 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
248 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
249 benchmark_iterations_, disable_cpu_flags_,
250 benchmark_cpu_info_, +1, 0);
251 EXPECT_LE(max_diff, 2);
252 }
253
TEST_F(LibYUVPlanarTest,TestARGBComputeCumulativeSum)254 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
255 SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]);
256 SIMD_ALIGNED(int32_t added_pixels[16][16][4]);
257
258 for (int y = 0; y < 16; ++y) {
259 for (int x = 0; x < 16; ++x) {
260 orig_pixels[y][x][0] = 1u;
261 orig_pixels[y][x][1] = 2u;
262 orig_pixels[y][x][2] = 3u;
263 orig_pixels[y][x][3] = 255u;
264 }
265 }
266
267 ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4,
268 &added_pixels[0][0][0], 16 * 4, 16, 16);
269
270 for (int y = 0; y < 16; ++y) {
271 for (int x = 0; x < 16; ++x) {
272 EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]);
273 EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]);
274 EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]);
275 EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]);
276 }
277 }
278 }
279
TEST_F(LibYUVPlanarTest,TestARGBGray)280 TEST_F(LibYUVPlanarTest, TestARGBGray) {
281 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
282 memset(orig_pixels, 0, sizeof(orig_pixels));
283
284 // Test blue
285 orig_pixels[0][0] = 255u;
286 orig_pixels[0][1] = 0u;
287 orig_pixels[0][2] = 0u;
288 orig_pixels[0][3] = 128u;
289 // Test green
290 orig_pixels[1][0] = 0u;
291 orig_pixels[1][1] = 255u;
292 orig_pixels[1][2] = 0u;
293 orig_pixels[1][3] = 0u;
294 // Test red
295 orig_pixels[2][0] = 0u;
296 orig_pixels[2][1] = 0u;
297 orig_pixels[2][2] = 255u;
298 orig_pixels[2][3] = 255u;
299 // Test black
300 orig_pixels[3][0] = 0u;
301 orig_pixels[3][1] = 0u;
302 orig_pixels[3][2] = 0u;
303 orig_pixels[3][3] = 255u;
304 // Test white
305 orig_pixels[4][0] = 255u;
306 orig_pixels[4][1] = 255u;
307 orig_pixels[4][2] = 255u;
308 orig_pixels[4][3] = 255u;
309 // Test color
310 orig_pixels[5][0] = 16u;
311 orig_pixels[5][1] = 64u;
312 orig_pixels[5][2] = 192u;
313 orig_pixels[5][3] = 224u;
314 // Do 16 to test asm version.
315 ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
316 EXPECT_EQ(30u, orig_pixels[0][0]);
317 EXPECT_EQ(30u, orig_pixels[0][1]);
318 EXPECT_EQ(30u, orig_pixels[0][2]);
319 EXPECT_EQ(128u, orig_pixels[0][3]);
320 EXPECT_EQ(149u, orig_pixels[1][0]);
321 EXPECT_EQ(149u, orig_pixels[1][1]);
322 EXPECT_EQ(149u, orig_pixels[1][2]);
323 EXPECT_EQ(0u, orig_pixels[1][3]);
324 EXPECT_EQ(76u, orig_pixels[2][0]);
325 EXPECT_EQ(76u, orig_pixels[2][1]);
326 EXPECT_EQ(76u, orig_pixels[2][2]);
327 EXPECT_EQ(255u, orig_pixels[2][3]);
328 EXPECT_EQ(0u, orig_pixels[3][0]);
329 EXPECT_EQ(0u, orig_pixels[3][1]);
330 EXPECT_EQ(0u, orig_pixels[3][2]);
331 EXPECT_EQ(255u, orig_pixels[3][3]);
332 EXPECT_EQ(255u, orig_pixels[4][0]);
333 EXPECT_EQ(255u, orig_pixels[4][1]);
334 EXPECT_EQ(255u, orig_pixels[4][2]);
335 EXPECT_EQ(255u, orig_pixels[4][3]);
336 EXPECT_EQ(96u, orig_pixels[5][0]);
337 EXPECT_EQ(96u, orig_pixels[5][1]);
338 EXPECT_EQ(96u, orig_pixels[5][2]);
339 EXPECT_EQ(224u, orig_pixels[5][3]);
340 for (int i = 0; i < 1280; ++i) {
341 orig_pixels[i][0] = i;
342 orig_pixels[i][1] = i / 2;
343 orig_pixels[i][2] = i / 3;
344 orig_pixels[i][3] = i;
345 }
346 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
347 ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
348 }
349 }
350
TEST_F(LibYUVPlanarTest,TestARGBGrayTo)351 TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
352 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
353 SIMD_ALIGNED(uint8_t gray_pixels[1280][4]);
354 memset(orig_pixels, 0, sizeof(orig_pixels));
355
356 // Test blue
357 orig_pixels[0][0] = 255u;
358 orig_pixels[0][1] = 0u;
359 orig_pixels[0][2] = 0u;
360 orig_pixels[0][3] = 128u;
361 // Test green
362 orig_pixels[1][0] = 0u;
363 orig_pixels[1][1] = 255u;
364 orig_pixels[1][2] = 0u;
365 orig_pixels[1][3] = 0u;
366 // Test red
367 orig_pixels[2][0] = 0u;
368 orig_pixels[2][1] = 0u;
369 orig_pixels[2][2] = 255u;
370 orig_pixels[2][3] = 255u;
371 // Test black
372 orig_pixels[3][0] = 0u;
373 orig_pixels[3][1] = 0u;
374 orig_pixels[3][2] = 0u;
375 orig_pixels[3][3] = 255u;
376 // Test white
377 orig_pixels[4][0] = 255u;
378 orig_pixels[4][1] = 255u;
379 orig_pixels[4][2] = 255u;
380 orig_pixels[4][3] = 255u;
381 // Test color
382 orig_pixels[5][0] = 16u;
383 orig_pixels[5][1] = 64u;
384 orig_pixels[5][2] = 192u;
385 orig_pixels[5][3] = 224u;
386 // Do 16 to test asm version.
387 ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
388 EXPECT_EQ(30u, gray_pixels[0][0]);
389 EXPECT_EQ(30u, gray_pixels[0][1]);
390 EXPECT_EQ(30u, gray_pixels[0][2]);
391 EXPECT_EQ(128u, gray_pixels[0][3]);
392 EXPECT_EQ(149u, gray_pixels[1][0]);
393 EXPECT_EQ(149u, gray_pixels[1][1]);
394 EXPECT_EQ(149u, gray_pixels[1][2]);
395 EXPECT_EQ(0u, gray_pixels[1][3]);
396 EXPECT_EQ(76u, gray_pixels[2][0]);
397 EXPECT_EQ(76u, gray_pixels[2][1]);
398 EXPECT_EQ(76u, gray_pixels[2][2]);
399 EXPECT_EQ(255u, gray_pixels[2][3]);
400 EXPECT_EQ(0u, gray_pixels[3][0]);
401 EXPECT_EQ(0u, gray_pixels[3][1]);
402 EXPECT_EQ(0u, gray_pixels[3][2]);
403 EXPECT_EQ(255u, gray_pixels[3][3]);
404 EXPECT_EQ(255u, gray_pixels[4][0]);
405 EXPECT_EQ(255u, gray_pixels[4][1]);
406 EXPECT_EQ(255u, gray_pixels[4][2]);
407 EXPECT_EQ(255u, gray_pixels[4][3]);
408 EXPECT_EQ(96u, gray_pixels[5][0]);
409 EXPECT_EQ(96u, gray_pixels[5][1]);
410 EXPECT_EQ(96u, gray_pixels[5][2]);
411 EXPECT_EQ(224u, gray_pixels[5][3]);
412 for (int i = 0; i < 1280; ++i) {
413 orig_pixels[i][0] = i;
414 orig_pixels[i][1] = i / 2;
415 orig_pixels[i][2] = i / 3;
416 orig_pixels[i][3] = i;
417 }
418 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
419 ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
420 }
421 }
422
TEST_F(LibYUVPlanarTest,TestARGBSepia)423 TEST_F(LibYUVPlanarTest, TestARGBSepia) {
424 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
425 memset(orig_pixels, 0, sizeof(orig_pixels));
426
427 // Test blue
428 orig_pixels[0][0] = 255u;
429 orig_pixels[0][1] = 0u;
430 orig_pixels[0][2] = 0u;
431 orig_pixels[0][3] = 128u;
432 // Test green
433 orig_pixels[1][0] = 0u;
434 orig_pixels[1][1] = 255u;
435 orig_pixels[1][2] = 0u;
436 orig_pixels[1][3] = 0u;
437 // Test red
438 orig_pixels[2][0] = 0u;
439 orig_pixels[2][1] = 0u;
440 orig_pixels[2][2] = 255u;
441 orig_pixels[2][3] = 255u;
442 // Test black
443 orig_pixels[3][0] = 0u;
444 orig_pixels[3][1] = 0u;
445 orig_pixels[3][2] = 0u;
446 orig_pixels[3][3] = 255u;
447 // Test white
448 orig_pixels[4][0] = 255u;
449 orig_pixels[4][1] = 255u;
450 orig_pixels[4][2] = 255u;
451 orig_pixels[4][3] = 255u;
452 // Test color
453 orig_pixels[5][0] = 16u;
454 orig_pixels[5][1] = 64u;
455 orig_pixels[5][2] = 192u;
456 orig_pixels[5][3] = 224u;
457 // Do 16 to test asm version.
458 ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
459 EXPECT_EQ(33u, orig_pixels[0][0]);
460 EXPECT_EQ(43u, orig_pixels[0][1]);
461 EXPECT_EQ(47u, orig_pixels[0][2]);
462 EXPECT_EQ(128u, orig_pixels[0][3]);
463 EXPECT_EQ(135u, orig_pixels[1][0]);
464 EXPECT_EQ(175u, orig_pixels[1][1]);
465 EXPECT_EQ(195u, orig_pixels[1][2]);
466 EXPECT_EQ(0u, orig_pixels[1][3]);
467 EXPECT_EQ(69u, orig_pixels[2][0]);
468 EXPECT_EQ(89u, orig_pixels[2][1]);
469 EXPECT_EQ(99u, orig_pixels[2][2]);
470 EXPECT_EQ(255u, orig_pixels[2][3]);
471 EXPECT_EQ(0u, orig_pixels[3][0]);
472 EXPECT_EQ(0u, orig_pixels[3][1]);
473 EXPECT_EQ(0u, orig_pixels[3][2]);
474 EXPECT_EQ(255u, orig_pixels[3][3]);
475 EXPECT_EQ(239u, orig_pixels[4][0]);
476 EXPECT_EQ(255u, orig_pixels[4][1]);
477 EXPECT_EQ(255u, orig_pixels[4][2]);
478 EXPECT_EQ(255u, orig_pixels[4][3]);
479 EXPECT_EQ(88u, orig_pixels[5][0]);
480 EXPECT_EQ(114u, orig_pixels[5][1]);
481 EXPECT_EQ(127u, orig_pixels[5][2]);
482 EXPECT_EQ(224u, orig_pixels[5][3]);
483
484 for (int i = 0; i < 1280; ++i) {
485 orig_pixels[i][0] = i;
486 orig_pixels[i][1] = i / 2;
487 orig_pixels[i][2] = i / 3;
488 orig_pixels[i][3] = i;
489 }
490 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
491 ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
492 }
493 }
494
TEST_F(LibYUVPlanarTest,TestARGBColorMatrix)495 TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
496 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
497 SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
498 SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
499
500 // Matrix for Sepia.
501 SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
502 17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
503 24 / 2, 98 / 2, 50 / 2, 0, 0, 0, 0, 64, // Copy alpha.
504 };
505 memset(orig_pixels, 0, sizeof(orig_pixels));
506
507 // Test blue
508 orig_pixels[0][0] = 255u;
509 orig_pixels[0][1] = 0u;
510 orig_pixels[0][2] = 0u;
511 orig_pixels[0][3] = 128u;
512 // Test green
513 orig_pixels[1][0] = 0u;
514 orig_pixels[1][1] = 255u;
515 orig_pixels[1][2] = 0u;
516 orig_pixels[1][3] = 0u;
517 // Test red
518 orig_pixels[2][0] = 0u;
519 orig_pixels[2][1] = 0u;
520 orig_pixels[2][2] = 255u;
521 orig_pixels[2][3] = 255u;
522 // Test color
523 orig_pixels[3][0] = 16u;
524 orig_pixels[3][1] = 64u;
525 orig_pixels[3][2] = 192u;
526 orig_pixels[3][3] = 224u;
527 // Do 16 to test asm version.
528 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
529 &kRGBToSepia[0], 16, 1);
530 EXPECT_EQ(31u, dst_pixels_opt[0][0]);
531 EXPECT_EQ(43u, dst_pixels_opt[0][1]);
532 EXPECT_EQ(47u, dst_pixels_opt[0][2]);
533 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
534 EXPECT_EQ(135u, dst_pixels_opt[1][0]);
535 EXPECT_EQ(175u, dst_pixels_opt[1][1]);
536 EXPECT_EQ(195u, dst_pixels_opt[1][2]);
537 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
538 EXPECT_EQ(67u, dst_pixels_opt[2][0]);
539 EXPECT_EQ(87u, dst_pixels_opt[2][1]);
540 EXPECT_EQ(99u, dst_pixels_opt[2][2]);
541 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
542 EXPECT_EQ(87u, dst_pixels_opt[3][0]);
543 EXPECT_EQ(112u, dst_pixels_opt[3][1]);
544 EXPECT_EQ(127u, dst_pixels_opt[3][2]);
545 EXPECT_EQ(224u, dst_pixels_opt[3][3]);
546
547 for (int i = 0; i < 1280; ++i) {
548 orig_pixels[i][0] = i;
549 orig_pixels[i][1] = i / 2;
550 orig_pixels[i][2] = i / 3;
551 orig_pixels[i][3] = i;
552 }
553 MaskCpuFlags(disable_cpu_flags_);
554 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
555 &kRGBToSepia[0], 1280, 1);
556 MaskCpuFlags(benchmark_cpu_info_);
557
558 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
559 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
560 &kRGBToSepia[0], 1280, 1);
561 }
562
563 for (int i = 0; i < 1280; ++i) {
564 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
565 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
566 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
567 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
568 }
569 }
570
TEST_F(LibYUVPlanarTest,TestRGBColorMatrix)571 TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
572 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
573
574 // Matrix for Sepia.
575 SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
576 17, 68, 35, 0, 22, 88, 45, 0,
577 24, 98, 50, 0, 0, 0, 0, 0, // Unused but makes matrix 16 bytes.
578 };
579 memset(orig_pixels, 0, sizeof(orig_pixels));
580
581 // Test blue
582 orig_pixels[0][0] = 255u;
583 orig_pixels[0][1] = 0u;
584 orig_pixels[0][2] = 0u;
585 orig_pixels[0][3] = 128u;
586 // Test green
587 orig_pixels[1][0] = 0u;
588 orig_pixels[1][1] = 255u;
589 orig_pixels[1][2] = 0u;
590 orig_pixels[1][3] = 0u;
591 // Test red
592 orig_pixels[2][0] = 0u;
593 orig_pixels[2][1] = 0u;
594 orig_pixels[2][2] = 255u;
595 orig_pixels[2][3] = 255u;
596 // Test color
597 orig_pixels[3][0] = 16u;
598 orig_pixels[3][1] = 64u;
599 orig_pixels[3][2] = 192u;
600 orig_pixels[3][3] = 224u;
601 // Do 16 to test asm version.
602 RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
603 EXPECT_EQ(31u, orig_pixels[0][0]);
604 EXPECT_EQ(43u, orig_pixels[0][1]);
605 EXPECT_EQ(47u, orig_pixels[0][2]);
606 EXPECT_EQ(128u, orig_pixels[0][3]);
607 EXPECT_EQ(135u, orig_pixels[1][0]);
608 EXPECT_EQ(175u, orig_pixels[1][1]);
609 EXPECT_EQ(195u, orig_pixels[1][2]);
610 EXPECT_EQ(0u, orig_pixels[1][3]);
611 EXPECT_EQ(67u, orig_pixels[2][0]);
612 EXPECT_EQ(87u, orig_pixels[2][1]);
613 EXPECT_EQ(99u, orig_pixels[2][2]);
614 EXPECT_EQ(255u, orig_pixels[2][3]);
615 EXPECT_EQ(87u, orig_pixels[3][0]);
616 EXPECT_EQ(112u, orig_pixels[3][1]);
617 EXPECT_EQ(127u, orig_pixels[3][2]);
618 EXPECT_EQ(224u, orig_pixels[3][3]);
619
620 for (int i = 0; i < 1280; ++i) {
621 orig_pixels[i][0] = i;
622 orig_pixels[i][1] = i / 2;
623 orig_pixels[i][2] = i / 3;
624 orig_pixels[i][3] = i;
625 }
626 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
627 RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
628 }
629 }
630
TEST_F(LibYUVPlanarTest,TestARGBColorTable)631 TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
632 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
633 memset(orig_pixels, 0, sizeof(orig_pixels));
634
635 // Matrix for Sepia.
636 static const uint8_t kARGBTable[256 * 4] = {
637 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
638 };
639
640 orig_pixels[0][0] = 0u;
641 orig_pixels[0][1] = 0u;
642 orig_pixels[0][2] = 0u;
643 orig_pixels[0][3] = 0u;
644 orig_pixels[1][0] = 1u;
645 orig_pixels[1][1] = 1u;
646 orig_pixels[1][2] = 1u;
647 orig_pixels[1][3] = 1u;
648 orig_pixels[2][0] = 2u;
649 orig_pixels[2][1] = 2u;
650 orig_pixels[2][2] = 2u;
651 orig_pixels[2][3] = 2u;
652 orig_pixels[3][0] = 0u;
653 orig_pixels[3][1] = 1u;
654 orig_pixels[3][2] = 2u;
655 orig_pixels[3][3] = 3u;
656 // Do 16 to test asm version.
657 ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
658 EXPECT_EQ(1u, orig_pixels[0][0]);
659 EXPECT_EQ(2u, orig_pixels[0][1]);
660 EXPECT_EQ(3u, orig_pixels[0][2]);
661 EXPECT_EQ(4u, orig_pixels[0][3]);
662 EXPECT_EQ(5u, orig_pixels[1][0]);
663 EXPECT_EQ(6u, orig_pixels[1][1]);
664 EXPECT_EQ(7u, orig_pixels[1][2]);
665 EXPECT_EQ(8u, orig_pixels[1][3]);
666 EXPECT_EQ(9u, orig_pixels[2][0]);
667 EXPECT_EQ(10u, orig_pixels[2][1]);
668 EXPECT_EQ(11u, orig_pixels[2][2]);
669 EXPECT_EQ(12u, orig_pixels[2][3]);
670 EXPECT_EQ(1u, orig_pixels[3][0]);
671 EXPECT_EQ(6u, orig_pixels[3][1]);
672 EXPECT_EQ(11u, orig_pixels[3][2]);
673 EXPECT_EQ(16u, orig_pixels[3][3]);
674
675 for (int i = 0; i < 1280; ++i) {
676 orig_pixels[i][0] = i;
677 orig_pixels[i][1] = i / 2;
678 orig_pixels[i][2] = i / 3;
679 orig_pixels[i][3] = i;
680 }
681 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
682 ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
683 }
684 }
685
686 // Same as TestARGBColorTable except alpha does not change.
TEST_F(LibYUVPlanarTest,TestRGBColorTable)687 TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
688 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
689 memset(orig_pixels, 0, sizeof(orig_pixels));
690
691 // Matrix for Sepia.
692 static const uint8_t kARGBTable[256 * 4] = {
693 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
694 };
695
696 orig_pixels[0][0] = 0u;
697 orig_pixels[0][1] = 0u;
698 orig_pixels[0][2] = 0u;
699 orig_pixels[0][3] = 0u;
700 orig_pixels[1][0] = 1u;
701 orig_pixels[1][1] = 1u;
702 orig_pixels[1][2] = 1u;
703 orig_pixels[1][3] = 1u;
704 orig_pixels[2][0] = 2u;
705 orig_pixels[2][1] = 2u;
706 orig_pixels[2][2] = 2u;
707 orig_pixels[2][3] = 2u;
708 orig_pixels[3][0] = 0u;
709 orig_pixels[3][1] = 1u;
710 orig_pixels[3][2] = 2u;
711 orig_pixels[3][3] = 3u;
712 // Do 16 to test asm version.
713 RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
714 EXPECT_EQ(1u, orig_pixels[0][0]);
715 EXPECT_EQ(2u, orig_pixels[0][1]);
716 EXPECT_EQ(3u, orig_pixels[0][2]);
717 EXPECT_EQ(0u, orig_pixels[0][3]); // Alpha unchanged.
718 EXPECT_EQ(5u, orig_pixels[1][0]);
719 EXPECT_EQ(6u, orig_pixels[1][1]);
720 EXPECT_EQ(7u, orig_pixels[1][2]);
721 EXPECT_EQ(1u, orig_pixels[1][3]); // Alpha unchanged.
722 EXPECT_EQ(9u, orig_pixels[2][0]);
723 EXPECT_EQ(10u, orig_pixels[2][1]);
724 EXPECT_EQ(11u, orig_pixels[2][2]);
725 EXPECT_EQ(2u, orig_pixels[2][3]); // Alpha unchanged.
726 EXPECT_EQ(1u, orig_pixels[3][0]);
727 EXPECT_EQ(6u, orig_pixels[3][1]);
728 EXPECT_EQ(11u, orig_pixels[3][2]);
729 EXPECT_EQ(3u, orig_pixels[3][3]); // Alpha unchanged.
730
731 for (int i = 0; i < 1280; ++i) {
732 orig_pixels[i][0] = i;
733 orig_pixels[i][1] = i / 2;
734 orig_pixels[i][2] = i / 3;
735 orig_pixels[i][3] = i;
736 }
737 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
738 RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
739 }
740 }
741
TEST_F(LibYUVPlanarTest,TestARGBQuantize)742 TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
743 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
744
745 for (int i = 0; i < 1280; ++i) {
746 orig_pixels[i][0] = i;
747 orig_pixels[i][1] = i / 2;
748 orig_pixels[i][2] = i / 3;
749 orig_pixels[i][3] = i;
750 }
751 ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
752 1280, 1);
753
754 for (int i = 0; i < 1280; ++i) {
755 EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]);
756 EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]);
757 EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]);
758 EXPECT_EQ(i & 255, orig_pixels[i][3]);
759 }
760 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
761 ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
762 1280, 1);
763 }
764 }
765
TEST_F(LibYUVPlanarTest,TestARGBMirror)766 TEST_F(LibYUVPlanarTest, TestARGBMirror) {
767 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
768 SIMD_ALIGNED(uint8_t dst_pixels[1280][4]);
769
770 for (int i = 0; i < 1280; ++i) {
771 orig_pixels[i][0] = i;
772 orig_pixels[i][1] = i / 2;
773 orig_pixels[i][2] = i / 3;
774 orig_pixels[i][3] = i / 4;
775 }
776 ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
777
778 for (int i = 0; i < 1280; ++i) {
779 EXPECT_EQ(i & 255, dst_pixels[1280 - 1 - i][0]);
780 EXPECT_EQ((i / 2) & 255, dst_pixels[1280 - 1 - i][1]);
781 EXPECT_EQ((i / 3) & 255, dst_pixels[1280 - 1 - i][2]);
782 EXPECT_EQ((i / 4) & 255, dst_pixels[1280 - 1 - i][3]);
783 }
784 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
785 ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
786 }
787 }
788
TEST_F(LibYUVPlanarTest,TestShade)789 TEST_F(LibYUVPlanarTest, TestShade) {
790 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
791 SIMD_ALIGNED(uint8_t shade_pixels[1280][4]);
792 memset(orig_pixels, 0, sizeof(orig_pixels));
793
794 orig_pixels[0][0] = 10u;
795 orig_pixels[0][1] = 20u;
796 orig_pixels[0][2] = 40u;
797 orig_pixels[0][3] = 80u;
798 orig_pixels[1][0] = 0u;
799 orig_pixels[1][1] = 0u;
800 orig_pixels[1][2] = 0u;
801 orig_pixels[1][3] = 255u;
802 orig_pixels[2][0] = 0u;
803 orig_pixels[2][1] = 0u;
804 orig_pixels[2][2] = 0u;
805 orig_pixels[2][3] = 0u;
806 orig_pixels[3][0] = 0u;
807 orig_pixels[3][1] = 0u;
808 orig_pixels[3][2] = 0u;
809 orig_pixels[3][3] = 0u;
810 // Do 8 pixels to allow opt version to be used.
811 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff);
812 EXPECT_EQ(10u, shade_pixels[0][0]);
813 EXPECT_EQ(20u, shade_pixels[0][1]);
814 EXPECT_EQ(40u, shade_pixels[0][2]);
815 EXPECT_EQ(40u, shade_pixels[0][3]);
816 EXPECT_EQ(0u, shade_pixels[1][0]);
817 EXPECT_EQ(0u, shade_pixels[1][1]);
818 EXPECT_EQ(0u, shade_pixels[1][2]);
819 EXPECT_EQ(128u, shade_pixels[1][3]);
820 EXPECT_EQ(0u, shade_pixels[2][0]);
821 EXPECT_EQ(0u, shade_pixels[2][1]);
822 EXPECT_EQ(0u, shade_pixels[2][2]);
823 EXPECT_EQ(0u, shade_pixels[2][3]);
824 EXPECT_EQ(0u, shade_pixels[3][0]);
825 EXPECT_EQ(0u, shade_pixels[3][1]);
826 EXPECT_EQ(0u, shade_pixels[3][2]);
827 EXPECT_EQ(0u, shade_pixels[3][3]);
828
829 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080);
830 EXPECT_EQ(5u, shade_pixels[0][0]);
831 EXPECT_EQ(10u, shade_pixels[0][1]);
832 EXPECT_EQ(20u, shade_pixels[0][2]);
833 EXPECT_EQ(40u, shade_pixels[0][3]);
834
835 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080);
836 EXPECT_EQ(5u, shade_pixels[0][0]);
837 EXPECT_EQ(5u, shade_pixels[0][1]);
838 EXPECT_EQ(5u, shade_pixels[0][2]);
839 EXPECT_EQ(5u, shade_pixels[0][3]);
840
841 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
842 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1,
843 0x80808080);
844 }
845 }
846
TEST_F(LibYUVPlanarTest,TestARGBInterpolate)847 TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
848 SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
849 SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]);
850 SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]);
851 memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
852 memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
853
854 orig_pixels_0[0][0] = 16u;
855 orig_pixels_0[0][1] = 32u;
856 orig_pixels_0[0][2] = 64u;
857 orig_pixels_0[0][3] = 128u;
858 orig_pixels_0[1][0] = 0u;
859 orig_pixels_0[1][1] = 0u;
860 orig_pixels_0[1][2] = 0u;
861 orig_pixels_0[1][3] = 255u;
862 orig_pixels_0[2][0] = 0u;
863 orig_pixels_0[2][1] = 0u;
864 orig_pixels_0[2][2] = 0u;
865 orig_pixels_0[2][3] = 0u;
866 orig_pixels_0[3][0] = 0u;
867 orig_pixels_0[3][1] = 0u;
868 orig_pixels_0[3][2] = 0u;
869 orig_pixels_0[3][3] = 0u;
870
871 orig_pixels_1[0][0] = 0u;
872 orig_pixels_1[0][1] = 0u;
873 orig_pixels_1[0][2] = 0u;
874 orig_pixels_1[0][3] = 0u;
875 orig_pixels_1[1][0] = 0u;
876 orig_pixels_1[1][1] = 0u;
877 orig_pixels_1[1][2] = 0u;
878 orig_pixels_1[1][3] = 0u;
879 orig_pixels_1[2][0] = 0u;
880 orig_pixels_1[2][1] = 0u;
881 orig_pixels_1[2][2] = 0u;
882 orig_pixels_1[2][3] = 0u;
883 orig_pixels_1[3][0] = 255u;
884 orig_pixels_1[3][1] = 255u;
885 orig_pixels_1[3][2] = 255u;
886 orig_pixels_1[3][3] = 255u;
887
888 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
889 &interpolate_pixels[0][0], 0, 4, 1, 128);
890 EXPECT_EQ(8u, interpolate_pixels[0][0]);
891 EXPECT_EQ(16u, interpolate_pixels[0][1]);
892 EXPECT_EQ(32u, interpolate_pixels[0][2]);
893 EXPECT_EQ(64u, interpolate_pixels[0][3]);
894 EXPECT_EQ(0u, interpolate_pixels[1][0]);
895 EXPECT_EQ(0u, interpolate_pixels[1][1]);
896 EXPECT_EQ(0u, interpolate_pixels[1][2]);
897 EXPECT_EQ(128u, interpolate_pixels[1][3]);
898 EXPECT_EQ(0u, interpolate_pixels[2][0]);
899 EXPECT_EQ(0u, interpolate_pixels[2][1]);
900 EXPECT_EQ(0u, interpolate_pixels[2][2]);
901 EXPECT_EQ(0u, interpolate_pixels[2][3]);
902 EXPECT_EQ(128u, interpolate_pixels[3][0]);
903 EXPECT_EQ(128u, interpolate_pixels[3][1]);
904 EXPECT_EQ(128u, interpolate_pixels[3][2]);
905 EXPECT_EQ(128u, interpolate_pixels[3][3]);
906
907 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
908 &interpolate_pixels[0][0], 0, 4, 1, 0);
909 EXPECT_EQ(16u, interpolate_pixels[0][0]);
910 EXPECT_EQ(32u, interpolate_pixels[0][1]);
911 EXPECT_EQ(64u, interpolate_pixels[0][2]);
912 EXPECT_EQ(128u, interpolate_pixels[0][3]);
913
914 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
915 &interpolate_pixels[0][0], 0, 4, 1, 192);
916
917 EXPECT_EQ(4u, interpolate_pixels[0][0]);
918 EXPECT_EQ(8u, interpolate_pixels[0][1]);
919 EXPECT_EQ(16u, interpolate_pixels[0][2]);
920 EXPECT_EQ(32u, interpolate_pixels[0][3]);
921
922 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
923 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
924 &interpolate_pixels[0][0], 0, 1280, 1, 128);
925 }
926 }
927
TEST_F(LibYUVPlanarTest,TestInterpolatePlane)928 TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
929 SIMD_ALIGNED(uint8_t orig_pixels_0[1280]);
930 SIMD_ALIGNED(uint8_t orig_pixels_1[1280]);
931 SIMD_ALIGNED(uint8_t interpolate_pixels[1280]);
932 memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
933 memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
934
935 orig_pixels_0[0] = 16u;
936 orig_pixels_0[1] = 32u;
937 orig_pixels_0[2] = 64u;
938 orig_pixels_0[3] = 128u;
939 orig_pixels_0[4] = 0u;
940 orig_pixels_0[5] = 0u;
941 orig_pixels_0[6] = 0u;
942 orig_pixels_0[7] = 255u;
943 orig_pixels_0[8] = 0u;
944 orig_pixels_0[9] = 0u;
945 orig_pixels_0[10] = 0u;
946 orig_pixels_0[11] = 0u;
947 orig_pixels_0[12] = 0u;
948 orig_pixels_0[13] = 0u;
949 orig_pixels_0[14] = 0u;
950 orig_pixels_0[15] = 0u;
951
952 orig_pixels_1[0] = 0u;
953 orig_pixels_1[1] = 0u;
954 orig_pixels_1[2] = 0u;
955 orig_pixels_1[3] = 0u;
956 orig_pixels_1[4] = 0u;
957 orig_pixels_1[5] = 0u;
958 orig_pixels_1[6] = 0u;
959 orig_pixels_1[7] = 0u;
960 orig_pixels_1[8] = 0u;
961 orig_pixels_1[9] = 0u;
962 orig_pixels_1[10] = 0u;
963 orig_pixels_1[11] = 0u;
964 orig_pixels_1[12] = 255u;
965 orig_pixels_1[13] = 255u;
966 orig_pixels_1[14] = 255u;
967 orig_pixels_1[15] = 255u;
968
969 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
970 &interpolate_pixels[0], 0, 16, 1, 128);
971 EXPECT_EQ(8u, interpolate_pixels[0]);
972 EXPECT_EQ(16u, interpolate_pixels[1]);
973 EXPECT_EQ(32u, interpolate_pixels[2]);
974 EXPECT_EQ(64u, interpolate_pixels[3]);
975 EXPECT_EQ(0u, interpolate_pixels[4]);
976 EXPECT_EQ(0u, interpolate_pixels[5]);
977 EXPECT_EQ(0u, interpolate_pixels[6]);
978 EXPECT_EQ(128u, interpolate_pixels[7]);
979 EXPECT_EQ(0u, interpolate_pixels[8]);
980 EXPECT_EQ(0u, interpolate_pixels[9]);
981 EXPECT_EQ(0u, interpolate_pixels[10]);
982 EXPECT_EQ(0u, interpolate_pixels[11]);
983 EXPECT_EQ(128u, interpolate_pixels[12]);
984 EXPECT_EQ(128u, interpolate_pixels[13]);
985 EXPECT_EQ(128u, interpolate_pixels[14]);
986 EXPECT_EQ(128u, interpolate_pixels[15]);
987
988 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
989 &interpolate_pixels[0], 0, 16, 1, 0);
990 EXPECT_EQ(16u, interpolate_pixels[0]);
991 EXPECT_EQ(32u, interpolate_pixels[1]);
992 EXPECT_EQ(64u, interpolate_pixels[2]);
993 EXPECT_EQ(128u, interpolate_pixels[3]);
994
995 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
996 &interpolate_pixels[0], 0, 16, 1, 192);
997
998 EXPECT_EQ(4u, interpolate_pixels[0]);
999 EXPECT_EQ(8u, interpolate_pixels[1]);
1000 EXPECT_EQ(16u, interpolate_pixels[2]);
1001 EXPECT_EQ(32u, interpolate_pixels[3]);
1002
1003 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1004 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1005 &interpolate_pixels[0], 0, 1280, 1, 123);
1006 }
1007 }
1008
1009 #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
1010 N, NEG, OFF) \
1011 TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
1012 const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
1013 const int kHeight = benchmark_height_; \
1014 const int kStrideA = \
1015 (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
1016 const int kStrideB = \
1017 (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
1018 align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \
1019 align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF); \
1020 align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \
1021 align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \
1022 for (int i = 0; i < kStrideA * kHeight; ++i) { \
1023 src_argb_a[i + OFF] = (fastrand() & 0xff); \
1024 src_argb_b[i + OFF] = (fastrand() & 0xff); \
1025 } \
1026 MaskCpuFlags(disable_cpu_flags_); \
1027 ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1028 dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP); \
1029 MaskCpuFlags(benchmark_cpu_info_); \
1030 for (int i = 0; i < benchmark_iterations_; ++i) { \
1031 ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1032 dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP); \
1033 } \
1034 for (int i = 0; i < kStrideB * kHeight; ++i) { \
1035 EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
1036 } \
1037 free_aligned_buffer_page_end(src_argb_a); \
1038 free_aligned_buffer_page_end(src_argb_b); \
1039 free_aligned_buffer_page_end(dst_argb_c); \
1040 free_aligned_buffer_page_end(dst_argb_opt); \
1041 }
1042
1043 #define TESTINTERPOLATE(TERP) \
1044 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ - 1, TERP, _Any, +, 0) \
1045 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
1046 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \
1047 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
1048
1049 TESTINTERPOLATE(0)
1050 TESTINTERPOLATE(64)
1051 TESTINTERPOLATE(128)
1052 TESTINTERPOLATE(192)
1053 TESTINTERPOLATE(255)
1054
TestBlend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1055 static int TestBlend(int width,
1056 int height,
1057 int benchmark_iterations,
1058 int disable_cpu_flags,
1059 int benchmark_cpu_info,
1060 int invert,
1061 int off) {
1062 if (width < 1) {
1063 width = 1;
1064 }
1065 const int kBpp = 4;
1066 const int kStride = width * kBpp;
1067 align_buffer_page_end(src_argb_a, kStride * height + off);
1068 align_buffer_page_end(src_argb_b, kStride * height + off);
1069 align_buffer_page_end(dst_argb_c, kStride * height);
1070 align_buffer_page_end(dst_argb_opt, kStride * height);
1071 for (int i = 0; i < kStride * height; ++i) {
1072 src_argb_a[i + off] = (fastrand() & 0xff);
1073 src_argb_b[i + off] = (fastrand() & 0xff);
1074 }
1075 ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
1076 height);
1077 ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width,
1078 height);
1079 memset(dst_argb_c, 255, kStride * height);
1080 memset(dst_argb_opt, 255, kStride * height);
1081
1082 MaskCpuFlags(disable_cpu_flags);
1083 ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1084 kStride, width, invert * height);
1085 MaskCpuFlags(benchmark_cpu_info);
1086 for (int i = 0; i < benchmark_iterations; ++i) {
1087 ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride,
1088 dst_argb_opt, kStride, width, invert * height);
1089 }
1090 int max_diff = 0;
1091 for (int i = 0; i < kStride * height; ++i) {
1092 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1093 static_cast<int>(dst_argb_opt[i]));
1094 if (abs_diff > max_diff) {
1095 max_diff = abs_diff;
1096 }
1097 }
1098 free_aligned_buffer_page_end(src_argb_a);
1099 free_aligned_buffer_page_end(src_argb_b);
1100 free_aligned_buffer_page_end(dst_argb_c);
1101 free_aligned_buffer_page_end(dst_argb_opt);
1102 return max_diff;
1103 }
1104
TEST_F(LibYUVPlanarTest,ARGBBlend_Any)1105 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
1106 int max_diff =
1107 TestBlend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1108 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1109 EXPECT_LE(max_diff, 1);
1110 }
1111
TEST_F(LibYUVPlanarTest,ARGBBlend_Unaligned)1112 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
1113 int max_diff =
1114 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1115 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1116 EXPECT_LE(max_diff, 1);
1117 }
1118
TEST_F(LibYUVPlanarTest,ARGBBlend_Invert)1119 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
1120 int max_diff =
1121 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1122 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1123 EXPECT_LE(max_diff, 1);
1124 }
1125
TEST_F(LibYUVPlanarTest,ARGBBlend_Opt)1126 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
1127 int max_diff =
1128 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1129 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1130 EXPECT_LE(max_diff, 1);
1131 }
1132
TestBlendPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1133 static void TestBlendPlane(int width,
1134 int height,
1135 int benchmark_iterations,
1136 int disable_cpu_flags,
1137 int benchmark_cpu_info,
1138 int invert,
1139 int off) {
1140 if (width < 1) {
1141 width = 1;
1142 }
1143 const int kBpp = 1;
1144 const int kStride = width * kBpp;
1145 align_buffer_page_end(src_argb_a, kStride * height + off);
1146 align_buffer_page_end(src_argb_b, kStride * height + off);
1147 align_buffer_page_end(src_argb_alpha, kStride * height + off);
1148 align_buffer_page_end(dst_argb_c, kStride * height + off);
1149 align_buffer_page_end(dst_argb_opt, kStride * height + off);
1150 memset(dst_argb_c, 255, kStride * height + off);
1151 memset(dst_argb_opt, 255, kStride * height + off);
1152
1153 // Test source is maintained exactly if alpha is 255.
1154 for (int i = 0; i < width; ++i) {
1155 src_argb_a[i + off] = i & 255;
1156 src_argb_b[i + off] = 255 - (i & 255);
1157 }
1158 memset(src_argb_alpha + off, 255, width);
1159 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1160 src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1161 for (int i = 0; i < width; ++i) {
1162 EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
1163 }
1164 // Test destination is maintained exactly if alpha is 0.
1165 memset(src_argb_alpha + off, 0, width);
1166 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1167 src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1168 for (int i = 0; i < width; ++i) {
1169 EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
1170 }
1171 for (int i = 0; i < kStride * height; ++i) {
1172 src_argb_a[i + off] = (fastrand() & 0xff);
1173 src_argb_b[i + off] = (fastrand() & 0xff);
1174 src_argb_alpha[i + off] = (fastrand() & 0xff);
1175 }
1176
1177 MaskCpuFlags(disable_cpu_flags);
1178 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1179 src_argb_alpha + off, width, dst_argb_c + off, width, width,
1180 invert * height);
1181 MaskCpuFlags(benchmark_cpu_info);
1182 for (int i = 0; i < benchmark_iterations; ++i) {
1183 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1184 src_argb_alpha + off, width, dst_argb_opt + off, width, width,
1185 invert * height);
1186 }
1187 for (int i = 0; i < kStride * height; ++i) {
1188 EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
1189 }
1190 free_aligned_buffer_page_end(src_argb_a);
1191 free_aligned_buffer_page_end(src_argb_b);
1192 free_aligned_buffer_page_end(src_argb_alpha);
1193 free_aligned_buffer_page_end(dst_argb_c);
1194 free_aligned_buffer_page_end(dst_argb_opt);
1195 }
1196
TEST_F(LibYUVPlanarTest,BlendPlane_Opt)1197 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
1198 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1199 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1200 }
TEST_F(LibYUVPlanarTest,BlendPlane_Unaligned)1201 TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
1202 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1203 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1204 }
TEST_F(LibYUVPlanarTest,BlendPlane_Any)1205 TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
1206 TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1207 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1208 }
TEST_F(LibYUVPlanarTest,BlendPlane_Invert)1209 TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
1210 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1211 disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
1212 }
1213
1214 #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
1215
TestI420Blend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1216 static void TestI420Blend(int width,
1217 int height,
1218 int benchmark_iterations,
1219 int disable_cpu_flags,
1220 int benchmark_cpu_info,
1221 int invert,
1222 int off) {
1223 width = ((width) > 0) ? (width) : 1;
1224 const int kStrideUV = SUBSAMPLE(width, 2);
1225 const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
1226 align_buffer_page_end(src_y0, width * height + off);
1227 align_buffer_page_end(src_u0, kSizeUV + off);
1228 align_buffer_page_end(src_v0, kSizeUV + off);
1229 align_buffer_page_end(src_y1, width * height + off);
1230 align_buffer_page_end(src_u1, kSizeUV + off);
1231 align_buffer_page_end(src_v1, kSizeUV + off);
1232 align_buffer_page_end(src_a, width * height + off);
1233 align_buffer_page_end(dst_y_c, width * height + off);
1234 align_buffer_page_end(dst_u_c, kSizeUV + off);
1235 align_buffer_page_end(dst_v_c, kSizeUV + off);
1236 align_buffer_page_end(dst_y_opt, width * height + off);
1237 align_buffer_page_end(dst_u_opt, kSizeUV + off);
1238 align_buffer_page_end(dst_v_opt, kSizeUV + off);
1239
1240 MemRandomize(src_y0, width * height + off);
1241 MemRandomize(src_u0, kSizeUV + off);
1242 MemRandomize(src_v0, kSizeUV + off);
1243 MemRandomize(src_y1, width * height + off);
1244 MemRandomize(src_u1, kSizeUV + off);
1245 MemRandomize(src_v1, kSizeUV + off);
1246 MemRandomize(src_a, width * height + off);
1247 memset(dst_y_c, 255, width * height + off);
1248 memset(dst_u_c, 255, kSizeUV + off);
1249 memset(dst_v_c, 255, kSizeUV + off);
1250 memset(dst_y_opt, 255, width * height + off);
1251 memset(dst_u_opt, 255, kSizeUV + off);
1252 memset(dst_v_opt, 255, kSizeUV + off);
1253
1254 MaskCpuFlags(disable_cpu_flags);
1255 I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1256 kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1257 src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width,
1258 dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width,
1259 invert * height);
1260 MaskCpuFlags(benchmark_cpu_info);
1261 for (int i = 0; i < benchmark_iterations; ++i) {
1262 I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1263 kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1264 src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off,
1265 width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV,
1266 width, invert * height);
1267 }
1268 for (int i = 0; i < width * height; ++i) {
1269 EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
1270 }
1271 for (int i = 0; i < kSizeUV; ++i) {
1272 EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]);
1273 EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]);
1274 }
1275 free_aligned_buffer_page_end(src_y0);
1276 free_aligned_buffer_page_end(src_u0);
1277 free_aligned_buffer_page_end(src_v0);
1278 free_aligned_buffer_page_end(src_y1);
1279 free_aligned_buffer_page_end(src_u1);
1280 free_aligned_buffer_page_end(src_v1);
1281 free_aligned_buffer_page_end(src_a);
1282 free_aligned_buffer_page_end(dst_y_c);
1283 free_aligned_buffer_page_end(dst_u_c);
1284 free_aligned_buffer_page_end(dst_v_c);
1285 free_aligned_buffer_page_end(dst_y_opt);
1286 free_aligned_buffer_page_end(dst_u_opt);
1287 free_aligned_buffer_page_end(dst_v_opt);
1288 }
1289
TEST_F(LibYUVPlanarTest,I420Blend_Opt)1290 TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
1291 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1292 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1293 }
TEST_F(LibYUVPlanarTest,I420Blend_Unaligned)1294 TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
1295 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1296 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1297 }
1298
1299 // TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable.
TEST_F(LibYUVPlanarTest,DISABLED_I420Blend_Any)1300 TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
1301 TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1302 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1303 }
TEST_F(LibYUVPlanarTest,I420Blend_Invert)1304 TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
1305 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1306 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1307 }
1308
TEST_F(LibYUVPlanarTest,TestAffine)1309 TEST_F(LibYUVPlanarTest, TestAffine) {
1310 SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
1311 SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]);
1312
1313 for (int i = 0; i < 1280; ++i) {
1314 for (int j = 0; j < 4; ++j) {
1315 orig_pixels_0[i][j] = i;
1316 }
1317 }
1318
1319 float uv_step[4] = {0.f, 0.f, 0.75f, 0.f};
1320
1321 ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step,
1322 1280);
1323 EXPECT_EQ(0u, interpolate_pixels_C[0][0]);
1324 EXPECT_EQ(96u, interpolate_pixels_C[128][0]);
1325 EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
1326
1327 #if defined(HAS_ARGBAFFINEROW_SSE2)
1328 SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]);
1329 ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1330 uv_step, 1280);
1331 EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
1332
1333 int has_sse2 = TestCpuFlag(kCpuHasSSE2);
1334 if (has_sse2) {
1335 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1336 ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1337 uv_step, 1280);
1338 }
1339 }
1340 #endif
1341 }
1342
TEST_F(LibYUVPlanarTest,TestCopyPlane)1343 TEST_F(LibYUVPlanarTest, TestCopyPlane) {
1344 int err = 0;
1345 int yw = benchmark_width_;
1346 int yh = benchmark_height_;
1347 int b = 12;
1348 int i, j;
1349
1350 int y_plane_size = (yw + b * 2) * (yh + b * 2);
1351 align_buffer_page_end(orig_y, y_plane_size);
1352 align_buffer_page_end(dst_c, y_plane_size);
1353 align_buffer_page_end(dst_opt, y_plane_size);
1354
1355 memset(orig_y, 0, y_plane_size);
1356 memset(dst_c, 0, y_plane_size);
1357 memset(dst_opt, 0, y_plane_size);
1358
1359 // Fill image buffers with random data.
1360 for (i = b; i < (yh + b); ++i) {
1361 for (j = b; j < (yw + b); ++j) {
1362 orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
1363 }
1364 }
1365
1366 // Fill destination buffers with random data.
1367 for (i = 0; i < y_plane_size; ++i) {
1368 uint8_t random_number = fastrand() & 0x7f;
1369 dst_c[i] = random_number;
1370 dst_opt[i] = dst_c[i];
1371 }
1372
1373 int y_off = b * (yw + b * 2) + b;
1374
1375 int y_st = yw + b * 2;
1376 int stride = 8;
1377
1378 // Disable all optimizations.
1379 MaskCpuFlags(disable_cpu_flags_);
1380 for (j = 0; j < benchmark_iterations_; j++) {
1381 CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
1382 }
1383
1384 // Enable optimizations.
1385 MaskCpuFlags(benchmark_cpu_info_);
1386 for (j = 0; j < benchmark_iterations_; j++) {
1387 CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
1388 }
1389
1390 for (i = 0; i < y_plane_size; ++i) {
1391 if (dst_c[i] != dst_opt[i]) {
1392 ++err;
1393 }
1394 }
1395
1396 free_aligned_buffer_page_end(orig_y);
1397 free_aligned_buffer_page_end(dst_c);
1398 free_aligned_buffer_page_end(dst_opt);
1399
1400 EXPECT_EQ(0, err);
1401 }
1402
TestMultiply(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1403 static int TestMultiply(int width,
1404 int height,
1405 int benchmark_iterations,
1406 int disable_cpu_flags,
1407 int benchmark_cpu_info,
1408 int invert,
1409 int off) {
1410 if (width < 1) {
1411 width = 1;
1412 }
1413 const int kBpp = 4;
1414 const int kStride = width * kBpp;
1415 align_buffer_page_end(src_argb_a, kStride * height + off);
1416 align_buffer_page_end(src_argb_b, kStride * height + off);
1417 align_buffer_page_end(dst_argb_c, kStride * height);
1418 align_buffer_page_end(dst_argb_opt, kStride * height);
1419 for (int i = 0; i < kStride * height; ++i) {
1420 src_argb_a[i + off] = (fastrand() & 0xff);
1421 src_argb_b[i + off] = (fastrand() & 0xff);
1422 }
1423 memset(dst_argb_c, 0, kStride * height);
1424 memset(dst_argb_opt, 0, kStride * height);
1425
1426 MaskCpuFlags(disable_cpu_flags);
1427 ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1428 kStride, width, invert * height);
1429 MaskCpuFlags(benchmark_cpu_info);
1430 for (int i = 0; i < benchmark_iterations; ++i) {
1431 ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride,
1432 dst_argb_opt, kStride, width, invert * height);
1433 }
1434 int max_diff = 0;
1435 for (int i = 0; i < kStride * height; ++i) {
1436 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1437 static_cast<int>(dst_argb_opt[i]));
1438 if (abs_diff > max_diff) {
1439 max_diff = abs_diff;
1440 }
1441 }
1442 free_aligned_buffer_page_end(src_argb_a);
1443 free_aligned_buffer_page_end(src_argb_b);
1444 free_aligned_buffer_page_end(dst_argb_c);
1445 free_aligned_buffer_page_end(dst_argb_opt);
1446 return max_diff;
1447 }
1448
TEST_F(LibYUVPlanarTest,ARGBMultiply_Any)1449 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
1450 int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_,
1451 benchmark_iterations_, disable_cpu_flags_,
1452 benchmark_cpu_info_, +1, 0);
1453 EXPECT_LE(max_diff, 1);
1454 }
1455
TEST_F(LibYUVPlanarTest,ARGBMultiply_Unaligned)1456 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
1457 int max_diff =
1458 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1459 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1460 EXPECT_LE(max_diff, 1);
1461 }
1462
TEST_F(LibYUVPlanarTest,ARGBMultiply_Invert)1463 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
1464 int max_diff =
1465 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1466 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1467 EXPECT_LE(max_diff, 1);
1468 }
1469
TEST_F(LibYUVPlanarTest,ARGBMultiply_Opt)1470 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
1471 int max_diff =
1472 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1473 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1474 EXPECT_LE(max_diff, 1);
1475 }
1476
TestAdd(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1477 static int TestAdd(int width,
1478 int height,
1479 int benchmark_iterations,
1480 int disable_cpu_flags,
1481 int benchmark_cpu_info,
1482 int invert,
1483 int off) {
1484 if (width < 1) {
1485 width = 1;
1486 }
1487 const int kBpp = 4;
1488 const int kStride = width * kBpp;
1489 align_buffer_page_end(src_argb_a, kStride * height + off);
1490 align_buffer_page_end(src_argb_b, kStride * height + off);
1491 align_buffer_page_end(dst_argb_c, kStride * height);
1492 align_buffer_page_end(dst_argb_opt, kStride * height);
1493 for (int i = 0; i < kStride * height; ++i) {
1494 src_argb_a[i + off] = (fastrand() & 0xff);
1495 src_argb_b[i + off] = (fastrand() & 0xff);
1496 }
1497 memset(dst_argb_c, 0, kStride * height);
1498 memset(dst_argb_opt, 0, kStride * height);
1499
1500 MaskCpuFlags(disable_cpu_flags);
1501 ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1502 kStride, width, invert * height);
1503 MaskCpuFlags(benchmark_cpu_info);
1504 for (int i = 0; i < benchmark_iterations; ++i) {
1505 ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt,
1506 kStride, width, invert * height);
1507 }
1508 int max_diff = 0;
1509 for (int i = 0; i < kStride * height; ++i) {
1510 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1511 static_cast<int>(dst_argb_opt[i]));
1512 if (abs_diff > max_diff) {
1513 max_diff = abs_diff;
1514 }
1515 }
1516 free_aligned_buffer_page_end(src_argb_a);
1517 free_aligned_buffer_page_end(src_argb_b);
1518 free_aligned_buffer_page_end(dst_argb_c);
1519 free_aligned_buffer_page_end(dst_argb_opt);
1520 return max_diff;
1521 }
1522
TEST_F(LibYUVPlanarTest,ARGBAdd_Any)1523 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
1524 int max_diff =
1525 TestAdd(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1526 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1527 EXPECT_LE(max_diff, 1);
1528 }
1529
TEST_F(LibYUVPlanarTest,ARGBAdd_Unaligned)1530 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) {
1531 int max_diff =
1532 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1533 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1534 EXPECT_LE(max_diff, 1);
1535 }
1536
TEST_F(LibYUVPlanarTest,ARGBAdd_Invert)1537 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) {
1538 int max_diff =
1539 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1540 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1541 EXPECT_LE(max_diff, 1);
1542 }
1543
TEST_F(LibYUVPlanarTest,ARGBAdd_Opt)1544 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) {
1545 int max_diff =
1546 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1547 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1548 EXPECT_LE(max_diff, 1);
1549 }
1550
TestSubtract(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1551 static int TestSubtract(int width,
1552 int height,
1553 int benchmark_iterations,
1554 int disable_cpu_flags,
1555 int benchmark_cpu_info,
1556 int invert,
1557 int off) {
1558 if (width < 1) {
1559 width = 1;
1560 }
1561 const int kBpp = 4;
1562 const int kStride = width * kBpp;
1563 align_buffer_page_end(src_argb_a, kStride * height + off);
1564 align_buffer_page_end(src_argb_b, kStride * height + off);
1565 align_buffer_page_end(dst_argb_c, kStride * height);
1566 align_buffer_page_end(dst_argb_opt, kStride * height);
1567 for (int i = 0; i < kStride * height; ++i) {
1568 src_argb_a[i + off] = (fastrand() & 0xff);
1569 src_argb_b[i + off] = (fastrand() & 0xff);
1570 }
1571 memset(dst_argb_c, 0, kStride * height);
1572 memset(dst_argb_opt, 0, kStride * height);
1573
1574 MaskCpuFlags(disable_cpu_flags);
1575 ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1576 kStride, width, invert * height);
1577 MaskCpuFlags(benchmark_cpu_info);
1578 for (int i = 0; i < benchmark_iterations; ++i) {
1579 ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride,
1580 dst_argb_opt, kStride, width, invert * height);
1581 }
1582 int max_diff = 0;
1583 for (int i = 0; i < kStride * height; ++i) {
1584 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1585 static_cast<int>(dst_argb_opt[i]));
1586 if (abs_diff > max_diff) {
1587 max_diff = abs_diff;
1588 }
1589 }
1590 free_aligned_buffer_page_end(src_argb_a);
1591 free_aligned_buffer_page_end(src_argb_b);
1592 free_aligned_buffer_page_end(dst_argb_c);
1593 free_aligned_buffer_page_end(dst_argb_opt);
1594 return max_diff;
1595 }
1596
TEST_F(LibYUVPlanarTest,ARGBSubtract_Any)1597 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
1598 int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_,
1599 benchmark_iterations_, disable_cpu_flags_,
1600 benchmark_cpu_info_, +1, 0);
1601 EXPECT_LE(max_diff, 1);
1602 }
1603
TEST_F(LibYUVPlanarTest,ARGBSubtract_Unaligned)1604 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) {
1605 int max_diff =
1606 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1607 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1608 EXPECT_LE(max_diff, 1);
1609 }
1610
TEST_F(LibYUVPlanarTest,ARGBSubtract_Invert)1611 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) {
1612 int max_diff =
1613 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1614 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1615 EXPECT_LE(max_diff, 1);
1616 }
1617
TEST_F(LibYUVPlanarTest,ARGBSubtract_Opt)1618 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) {
1619 int max_diff =
1620 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1621 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1622 EXPECT_LE(max_diff, 1);
1623 }
1624
TestSobel(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1625 static int TestSobel(int width,
1626 int height,
1627 int benchmark_iterations,
1628 int disable_cpu_flags,
1629 int benchmark_cpu_info,
1630 int invert,
1631 int off) {
1632 if (width < 1) {
1633 width = 1;
1634 }
1635 const int kBpp = 4;
1636 const int kStride = width * kBpp;
1637 align_buffer_page_end(src_argb_a, kStride * height + off);
1638 align_buffer_page_end(dst_argb_c, kStride * height);
1639 align_buffer_page_end(dst_argb_opt, kStride * height);
1640 memset(src_argb_a, 0, kStride * height + off);
1641 for (int i = 0; i < kStride * height; ++i) {
1642 src_argb_a[i + off] = (fastrand() & 0xff);
1643 }
1644 memset(dst_argb_c, 0, kStride * height);
1645 memset(dst_argb_opt, 0, kStride * height);
1646
1647 MaskCpuFlags(disable_cpu_flags);
1648 ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width,
1649 invert * height);
1650 MaskCpuFlags(benchmark_cpu_info);
1651 for (int i = 0; i < benchmark_iterations; ++i) {
1652 ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
1653 invert * height);
1654 }
1655 int max_diff = 0;
1656 for (int i = 0; i < kStride * height; ++i) {
1657 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1658 static_cast<int>(dst_argb_opt[i]));
1659 if (abs_diff > max_diff) {
1660 max_diff = abs_diff;
1661 }
1662 }
1663 free_aligned_buffer_page_end(src_argb_a);
1664 free_aligned_buffer_page_end(dst_argb_c);
1665 free_aligned_buffer_page_end(dst_argb_opt);
1666 return max_diff;
1667 }
1668
TEST_F(LibYUVPlanarTest,ARGBSobel_Any)1669 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
1670 int max_diff =
1671 TestSobel(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1672 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1673 EXPECT_EQ(0, max_diff);
1674 }
1675
TEST_F(LibYUVPlanarTest,ARGBSobel_Unaligned)1676 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) {
1677 int max_diff =
1678 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1679 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1680 EXPECT_EQ(0, max_diff);
1681 }
1682
TEST_F(LibYUVPlanarTest,ARGBSobel_Invert)1683 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) {
1684 int max_diff =
1685 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1686 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1687 EXPECT_EQ(0, max_diff);
1688 }
1689
TEST_F(LibYUVPlanarTest,ARGBSobel_Opt)1690 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) {
1691 int max_diff =
1692 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1693 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1694 EXPECT_EQ(0, max_diff);
1695 }
1696
TestSobelToPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1697 static int TestSobelToPlane(int width,
1698 int height,
1699 int benchmark_iterations,
1700 int disable_cpu_flags,
1701 int benchmark_cpu_info,
1702 int invert,
1703 int off) {
1704 if (width < 1) {
1705 width = 1;
1706 }
1707 const int kSrcBpp = 4;
1708 const int kDstBpp = 1;
1709 const int kSrcStride = (width * kSrcBpp + 15) & ~15;
1710 const int kDstStride = (width * kDstBpp + 15) & ~15;
1711 align_buffer_page_end(src_argb_a, kSrcStride * height + off);
1712 align_buffer_page_end(dst_argb_c, kDstStride * height);
1713 align_buffer_page_end(dst_argb_opt, kDstStride * height);
1714 memset(src_argb_a, 0, kSrcStride * height + off);
1715 for (int i = 0; i < kSrcStride * height; ++i) {
1716 src_argb_a[i + off] = (fastrand() & 0xff);
1717 }
1718 memset(dst_argb_c, 0, kDstStride * height);
1719 memset(dst_argb_opt, 0, kDstStride * height);
1720
1721 MaskCpuFlags(disable_cpu_flags);
1722 ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width,
1723 invert * height);
1724 MaskCpuFlags(benchmark_cpu_info);
1725 for (int i = 0; i < benchmark_iterations; ++i) {
1726 ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride,
1727 width, invert * height);
1728 }
1729 int max_diff = 0;
1730 for (int i = 0; i < kDstStride * height; ++i) {
1731 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1732 static_cast<int>(dst_argb_opt[i]));
1733 if (abs_diff > max_diff) {
1734 max_diff = abs_diff;
1735 }
1736 }
1737 free_aligned_buffer_page_end(src_argb_a);
1738 free_aligned_buffer_page_end(dst_argb_c);
1739 free_aligned_buffer_page_end(dst_argb_opt);
1740 return max_diff;
1741 }
1742
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Any)1743 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
1744 int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_,
1745 benchmark_iterations_, disable_cpu_flags_,
1746 benchmark_cpu_info_, +1, 0);
1747 EXPECT_EQ(0, max_diff);
1748 }
1749
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Unaligned)1750 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) {
1751 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1752 benchmark_iterations_, disable_cpu_flags_,
1753 benchmark_cpu_info_, +1, 1);
1754 EXPECT_EQ(0, max_diff);
1755 }
1756
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Invert)1757 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) {
1758 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1759 benchmark_iterations_, disable_cpu_flags_,
1760 benchmark_cpu_info_, -1, 0);
1761 EXPECT_EQ(0, max_diff);
1762 }
1763
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Opt)1764 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) {
1765 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1766 benchmark_iterations_, disable_cpu_flags_,
1767 benchmark_cpu_info_, +1, 0);
1768 EXPECT_EQ(0, max_diff);
1769 }
1770
TestSobelXY(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1771 static int TestSobelXY(int width,
1772 int height,
1773 int benchmark_iterations,
1774 int disable_cpu_flags,
1775 int benchmark_cpu_info,
1776 int invert,
1777 int off) {
1778 if (width < 1) {
1779 width = 1;
1780 }
1781 const int kBpp = 4;
1782 const int kStride = width * kBpp;
1783 align_buffer_page_end(src_argb_a, kStride * height + off);
1784 align_buffer_page_end(dst_argb_c, kStride * height);
1785 align_buffer_page_end(dst_argb_opt, kStride * height);
1786 memset(src_argb_a, 0, kStride * height + off);
1787 for (int i = 0; i < kStride * height; ++i) {
1788 src_argb_a[i + off] = (fastrand() & 0xff);
1789 }
1790 memset(dst_argb_c, 0, kStride * height);
1791 memset(dst_argb_opt, 0, kStride * height);
1792
1793 MaskCpuFlags(disable_cpu_flags);
1794 ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width,
1795 invert * height);
1796 MaskCpuFlags(benchmark_cpu_info);
1797 for (int i = 0; i < benchmark_iterations; ++i) {
1798 ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
1799 invert * height);
1800 }
1801 int max_diff = 0;
1802 for (int i = 0; i < kStride * height; ++i) {
1803 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1804 static_cast<int>(dst_argb_opt[i]));
1805 if (abs_diff > max_diff) {
1806 max_diff = abs_diff;
1807 }
1808 }
1809 free_aligned_buffer_page_end(src_argb_a);
1810 free_aligned_buffer_page_end(dst_argb_c);
1811 free_aligned_buffer_page_end(dst_argb_opt);
1812 return max_diff;
1813 }
1814
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Any)1815 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
1816 int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_,
1817 benchmark_iterations_, disable_cpu_flags_,
1818 benchmark_cpu_info_, +1, 0);
1819 EXPECT_EQ(0, max_diff);
1820 }
1821
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Unaligned)1822 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) {
1823 int max_diff =
1824 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1825 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1826 EXPECT_EQ(0, max_diff);
1827 }
1828
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Invert)1829 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) {
1830 int max_diff =
1831 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1832 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1833 EXPECT_EQ(0, max_diff);
1834 }
1835
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Opt)1836 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) {
1837 int max_diff =
1838 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1839 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1840 EXPECT_EQ(0, max_diff);
1841 }
1842
TestBlur(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int radius)1843 static int TestBlur(int width,
1844 int height,
1845 int benchmark_iterations,
1846 int disable_cpu_flags,
1847 int benchmark_cpu_info,
1848 int invert,
1849 int off,
1850 int radius) {
1851 if (width < 1) {
1852 width = 1;
1853 }
1854 const int kBpp = 4;
1855 const int kStride = width * kBpp;
1856 align_buffer_page_end(src_argb_a, kStride * height + off);
1857 align_buffer_page_end(dst_cumsum, width * height * 16);
1858 align_buffer_page_end(dst_argb_c, kStride * height);
1859 align_buffer_page_end(dst_argb_opt, kStride * height);
1860 for (int i = 0; i < kStride * height; ++i) {
1861 src_argb_a[i + off] = (fastrand() & 0xff);
1862 }
1863 memset(dst_cumsum, 0, width * height * 16);
1864 memset(dst_argb_c, 0, kStride * height);
1865 memset(dst_argb_opt, 0, kStride * height);
1866
1867 MaskCpuFlags(disable_cpu_flags);
1868 ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
1869 reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
1870 invert * height, radius);
1871 MaskCpuFlags(benchmark_cpu_info);
1872 for (int i = 0; i < benchmark_iterations; ++i) {
1873 ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
1874 reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
1875 invert * height, radius);
1876 }
1877 int max_diff = 0;
1878 for (int i = 0; i < kStride * height; ++i) {
1879 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1880 static_cast<int>(dst_argb_opt[i]));
1881 if (abs_diff > max_diff) {
1882 max_diff = abs_diff;
1883 }
1884 }
1885 free_aligned_buffer_page_end(src_argb_a);
1886 free_aligned_buffer_page_end(dst_cumsum);
1887 free_aligned_buffer_page_end(dst_argb_c);
1888 free_aligned_buffer_page_end(dst_argb_opt);
1889 return max_diff;
1890 }
1891
1892 static const int kBlurSize = 55;
TEST_F(LibYUVPlanarTest,ARGBBlur_Any)1893 TEST_F(LibYUVPlanarTest, ARGBBlur_Any) {
1894 int max_diff =
1895 TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1896 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
1897 EXPECT_LE(max_diff, 1);
1898 }
1899
TEST_F(LibYUVPlanarTest,ARGBBlur_Unaligned)1900 TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) {
1901 int max_diff =
1902 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1903 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
1904 EXPECT_LE(max_diff, 1);
1905 }
1906
TEST_F(LibYUVPlanarTest,ARGBBlur_Invert)1907 TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) {
1908 int max_diff =
1909 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1910 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
1911 EXPECT_LE(max_diff, 1);
1912 }
1913
TEST_F(LibYUVPlanarTest,ARGBBlur_Opt)1914 TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) {
1915 int max_diff =
1916 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1917 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
1918 EXPECT_LE(max_diff, 1);
1919 }
1920
1921 static const int kBlurSmallSize = 5;
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Any)1922 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) {
1923 int max_diff =
1924 TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1925 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
1926 EXPECT_LE(max_diff, 1);
1927 }
1928
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Unaligned)1929 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) {
1930 int max_diff =
1931 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1932 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
1933 EXPECT_LE(max_diff, 1);
1934 }
1935
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Invert)1936 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) {
1937 int max_diff =
1938 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1939 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
1940 EXPECT_LE(max_diff, 1);
1941 }
1942
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Opt)1943 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) {
1944 int max_diff =
1945 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1946 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
1947 EXPECT_LE(max_diff, 1);
1948 }
1949
TEST_F(LibYUVPlanarTest,TestARGBPolynomial)1950 TEST_F(LibYUVPlanarTest, TestARGBPolynomial) {
1951 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
1952 SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
1953 SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
1954 memset(orig_pixels, 0, sizeof(orig_pixels));
1955
1956 SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
1957 0.94230f, -3.03300f, -2.92500f, 0.f, // C0
1958 0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x
1959 0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x
1960 0.0f, 0.000006965f, 0.000008781f, 0.f, // C3 x * x * x
1961 };
1962
1963 // Test blue
1964 orig_pixels[0][0] = 255u;
1965 orig_pixels[0][1] = 0u;
1966 orig_pixels[0][2] = 0u;
1967 orig_pixels[0][3] = 128u;
1968 // Test green
1969 orig_pixels[1][0] = 0u;
1970 orig_pixels[1][1] = 255u;
1971 orig_pixels[1][2] = 0u;
1972 orig_pixels[1][3] = 0u;
1973 // Test red
1974 orig_pixels[2][0] = 0u;
1975 orig_pixels[2][1] = 0u;
1976 orig_pixels[2][2] = 255u;
1977 orig_pixels[2][3] = 255u;
1978 // Test white
1979 orig_pixels[3][0] = 255u;
1980 orig_pixels[3][1] = 255u;
1981 orig_pixels[3][2] = 255u;
1982 orig_pixels[3][3] = 255u;
1983 // Test color
1984 orig_pixels[4][0] = 16u;
1985 orig_pixels[4][1] = 64u;
1986 orig_pixels[4][2] = 192u;
1987 orig_pixels[4][3] = 224u;
1988 // Do 16 to test asm version.
1989 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
1990 &kWarmifyPolynomial[0], 16, 1);
1991 EXPECT_EQ(235u, dst_pixels_opt[0][0]);
1992 EXPECT_EQ(0u, dst_pixels_opt[0][1]);
1993 EXPECT_EQ(0u, dst_pixels_opt[0][2]);
1994 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
1995 EXPECT_EQ(0u, dst_pixels_opt[1][0]);
1996 EXPECT_EQ(233u, dst_pixels_opt[1][1]);
1997 EXPECT_EQ(0u, dst_pixels_opt[1][2]);
1998 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
1999 EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2000 EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2001 EXPECT_EQ(241u, dst_pixels_opt[2][2]);
2002 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2003 EXPECT_EQ(235u, dst_pixels_opt[3][0]);
2004 EXPECT_EQ(233u, dst_pixels_opt[3][1]);
2005 EXPECT_EQ(241u, dst_pixels_opt[3][2]);
2006 EXPECT_EQ(255u, dst_pixels_opt[3][3]);
2007 EXPECT_EQ(10u, dst_pixels_opt[4][0]);
2008 EXPECT_EQ(59u, dst_pixels_opt[4][1]);
2009 EXPECT_EQ(188u, dst_pixels_opt[4][2]);
2010 EXPECT_EQ(224u, dst_pixels_opt[4][3]);
2011
2012 for (int i = 0; i < 1280; ++i) {
2013 orig_pixels[i][0] = i;
2014 orig_pixels[i][1] = i / 2;
2015 orig_pixels[i][2] = i / 3;
2016 orig_pixels[i][3] = i;
2017 }
2018
2019 MaskCpuFlags(disable_cpu_flags_);
2020 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2021 &kWarmifyPolynomial[0], 1280, 1);
2022 MaskCpuFlags(benchmark_cpu_info_);
2023
2024 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2025 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2026 &kWarmifyPolynomial[0], 1280, 1);
2027 }
2028
2029 for (int i = 0; i < 1280; ++i) {
2030 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2031 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2032 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2033 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2034 }
2035 }
2036
TestHalfFloatPlane(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale,int mask)2037 int TestHalfFloatPlane(int benchmark_width,
2038 int benchmark_height,
2039 int benchmark_iterations,
2040 int disable_cpu_flags,
2041 int benchmark_cpu_info,
2042 float scale,
2043 int mask) {
2044 int i, j;
2045 const int y_plane_size = benchmark_width * benchmark_height * 2;
2046
2047 align_buffer_page_end(orig_y, y_plane_size * 3);
2048 uint8_t* dst_opt = orig_y + y_plane_size;
2049 uint8_t* dst_c = orig_y + y_plane_size * 2;
2050
2051 MemRandomize(orig_y, y_plane_size);
2052 memset(dst_c, 0, y_plane_size);
2053 memset(dst_opt, 1, y_plane_size);
2054
2055 for (i = 0; i < y_plane_size / 2; ++i) {
2056 reinterpret_cast<uint16_t*>(orig_y)[i] &= mask;
2057 }
2058
2059 // Disable all optimizations.
2060 MaskCpuFlags(disable_cpu_flags);
2061 for (j = 0; j < benchmark_iterations; j++) {
2062 HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2063 reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
2064 scale, benchmark_width, benchmark_height);
2065 }
2066
2067 // Enable optimizations.
2068 MaskCpuFlags(benchmark_cpu_info);
2069 for (j = 0; j < benchmark_iterations; j++) {
2070 HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2071 reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
2072 scale, benchmark_width, benchmark_height);
2073 }
2074
2075 int max_diff = 0;
2076 for (i = 0; i < y_plane_size / 2; ++i) {
2077 int abs_diff =
2078 abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
2079 static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
2080 if (abs_diff > max_diff) {
2081 max_diff = abs_diff;
2082 }
2083 }
2084
2085 free_aligned_buffer_page_end(orig_y);
2086 return max_diff;
2087 }
2088
2089 #if defined(__arm__)
EnableFlushDenormalToZero(void)2090 static void EnableFlushDenormalToZero(void) {
2091 uint32_t cw;
2092 __asm__ __volatile__(
2093 "vmrs %0, fpscr \n"
2094 "orr %0, %0, #0x1000000 \n"
2095 "vmsr fpscr, %0 \n"
2096 : "=r"(cw)::"memory");
2097 }
2098 #endif
2099
2100 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
2101 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
2102 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
2103
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_denormal)2104 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
2105 // 32 bit arm rounding on denormal case is off by 1 compared to C.
2106 #if defined(__arm__)
2107 EnableFlushDenormalToZero();
2108 #endif
2109 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2110 benchmark_iterations_, disable_cpu_flags_,
2111 benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
2112 EXPECT_EQ(0, diff);
2113 }
2114
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_One)2115 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
2116 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2117 benchmark_iterations_, disable_cpu_flags_,
2118 benchmark_cpu_info_, 1.0f, 65535);
2119 EXPECT_LE(diff, 1);
2120 }
2121
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_Opt)2122 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
2123 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2124 benchmark_iterations_, disable_cpu_flags_,
2125 benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
2126 EXPECT_EQ(0, diff);
2127 }
2128
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_10bit_Opt)2129 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
2130 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2131 benchmark_iterations_, disable_cpu_flags_,
2132 benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
2133 EXPECT_EQ(0, diff);
2134 }
2135
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_9bit_Opt)2136 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
2137 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2138 benchmark_iterations_, disable_cpu_flags_,
2139 benchmark_cpu_info_, 1.0f / 512.0f, 511);
2140 EXPECT_EQ(0, diff);
2141 }
2142
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Opt)2143 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
2144 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2145 benchmark_iterations_, disable_cpu_flags_,
2146 benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
2147 EXPECT_EQ(0, diff);
2148 }
2149
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Offby1)2150 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
2151 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2152 benchmark_iterations_, disable_cpu_flags_,
2153 benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
2154 EXPECT_EQ(0, diff);
2155 }
2156
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_One)2157 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
2158 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2159 benchmark_iterations_, disable_cpu_flags_,
2160 benchmark_cpu_info_, 1.0f, 2047);
2161 EXPECT_EQ(0, diff);
2162 }
2163
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_12bit_One)2164 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
2165 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2166 benchmark_iterations_, disable_cpu_flags_,
2167 benchmark_cpu_info_, 1.0f, 4095);
2168 EXPECT_LE(diff, 1);
2169 }
2170
TestByteToFloat(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale)2171 float TestByteToFloat(int benchmark_width,
2172 int benchmark_height,
2173 int benchmark_iterations,
2174 int disable_cpu_flags,
2175 int benchmark_cpu_info,
2176 float scale) {
2177 int i, j;
2178 const int y_plane_size = benchmark_width * benchmark_height;
2179
2180 align_buffer_page_end(orig_y, y_plane_size * (1 + 4 + 4));
2181 float* dst_opt = reinterpret_cast<float*>(orig_y + y_plane_size);
2182 float* dst_c = reinterpret_cast<float*>(orig_y + y_plane_size * 5);
2183
2184 MemRandomize(orig_y, y_plane_size);
2185 memset(dst_c, 0, y_plane_size * 4);
2186 memset(dst_opt, 1, y_plane_size * 4);
2187
2188 // Disable all optimizations.
2189 MaskCpuFlags(disable_cpu_flags);
2190 ByteToFloat(orig_y, dst_c, scale, y_plane_size);
2191
2192 // Enable optimizations.
2193 MaskCpuFlags(benchmark_cpu_info);
2194 for (j = 0; j < benchmark_iterations; j++) {
2195 ByteToFloat(orig_y, dst_opt, scale, y_plane_size);
2196 }
2197
2198 float max_diff = 0;
2199 for (i = 0; i < y_plane_size; ++i) {
2200 float abs_diff = fabs(dst_c[i] - dst_opt[i]);
2201 if (abs_diff > max_diff) {
2202 max_diff = abs_diff;
2203 }
2204 }
2205
2206 free_aligned_buffer_page_end(orig_y);
2207 return max_diff;
2208 }
2209
TEST_F(LibYUVPlanarTest,TestByteToFloat)2210 TEST_F(LibYUVPlanarTest, TestByteToFloat) {
2211 float diff = TestByteToFloat(benchmark_width_, benchmark_height_,
2212 benchmark_iterations_, disable_cpu_flags_,
2213 benchmark_cpu_info_, 1.0f);
2214 EXPECT_EQ(0.f, diff);
2215 }
2216
TEST_F(LibYUVPlanarTest,TestARGBLumaColorTable)2217 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
2218 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2219 SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2220 SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2221 memset(orig_pixels, 0, sizeof(orig_pixels));
2222
2223 align_buffer_page_end(lumacolortable, 32768);
2224 int v = 0;
2225 for (int i = 0; i < 32768; ++i) {
2226 lumacolortable[i] = v;
2227 v += 3;
2228 }
2229 // Test blue
2230 orig_pixels[0][0] = 255u;
2231 orig_pixels[0][1] = 0u;
2232 orig_pixels[0][2] = 0u;
2233 orig_pixels[0][3] = 128u;
2234 // Test green
2235 orig_pixels[1][0] = 0u;
2236 orig_pixels[1][1] = 255u;
2237 orig_pixels[1][2] = 0u;
2238 orig_pixels[1][3] = 0u;
2239 // Test red
2240 orig_pixels[2][0] = 0u;
2241 orig_pixels[2][1] = 0u;
2242 orig_pixels[2][2] = 255u;
2243 orig_pixels[2][3] = 255u;
2244 // Test color
2245 orig_pixels[3][0] = 16u;
2246 orig_pixels[3][1] = 64u;
2247 orig_pixels[3][2] = 192u;
2248 orig_pixels[3][3] = 224u;
2249 // Do 16 to test asm version.
2250 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2251 &lumacolortable[0], 16, 1);
2252 EXPECT_EQ(253u, dst_pixels_opt[0][0]);
2253 EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2254 EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2255 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2256 EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2257 EXPECT_EQ(253u, dst_pixels_opt[1][1]);
2258 EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2259 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2260 EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2261 EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2262 EXPECT_EQ(253u, dst_pixels_opt[2][2]);
2263 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2264 EXPECT_EQ(48u, dst_pixels_opt[3][0]);
2265 EXPECT_EQ(192u, dst_pixels_opt[3][1]);
2266 EXPECT_EQ(64u, dst_pixels_opt[3][2]);
2267 EXPECT_EQ(224u, dst_pixels_opt[3][3]);
2268
2269 for (int i = 0; i < 1280; ++i) {
2270 orig_pixels[i][0] = i;
2271 orig_pixels[i][1] = i / 2;
2272 orig_pixels[i][2] = i / 3;
2273 orig_pixels[i][3] = i;
2274 }
2275
2276 MaskCpuFlags(disable_cpu_flags_);
2277 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2278 lumacolortable, 1280, 1);
2279 MaskCpuFlags(benchmark_cpu_info_);
2280
2281 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2282 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2283 lumacolortable, 1280, 1);
2284 }
2285 for (int i = 0; i < 1280; ++i) {
2286 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2287 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2288 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2289 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2290 }
2291
2292 free_aligned_buffer_page_end(lumacolortable);
2293 }
2294
TEST_F(LibYUVPlanarTest,TestARGBCopyAlpha)2295 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
2296 const int kSize = benchmark_width_ * benchmark_height_ * 4;
2297 align_buffer_page_end(orig_pixels, kSize);
2298 align_buffer_page_end(dst_pixels_opt, kSize);
2299 align_buffer_page_end(dst_pixels_c, kSize);
2300
2301 MemRandomize(orig_pixels, kSize);
2302 MemRandomize(dst_pixels_opt, kSize);
2303 memcpy(dst_pixels_c, dst_pixels_opt, kSize);
2304
2305 MaskCpuFlags(disable_cpu_flags_);
2306 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c,
2307 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2308 MaskCpuFlags(benchmark_cpu_info_);
2309
2310 for (int i = 0; i < benchmark_iterations_; ++i) {
2311 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt,
2312 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2313 }
2314 for (int i = 0; i < kSize; ++i) {
2315 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2316 }
2317
2318 free_aligned_buffer_page_end(dst_pixels_c);
2319 free_aligned_buffer_page_end(dst_pixels_opt);
2320 free_aligned_buffer_page_end(orig_pixels);
2321 }
2322
TEST_F(LibYUVPlanarTest,TestARGBExtractAlpha)2323 TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
2324 const int kPixels = benchmark_width_ * benchmark_height_;
2325 align_buffer_page_end(src_pixels, kPixels * 4);
2326 align_buffer_page_end(dst_pixels_opt, kPixels);
2327 align_buffer_page_end(dst_pixels_c, kPixels);
2328
2329 MemRandomize(src_pixels, kPixels * 4);
2330 MemRandomize(dst_pixels_opt, kPixels);
2331 memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
2332
2333 MaskCpuFlags(disable_cpu_flags_);
2334 ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2335 benchmark_width_, benchmark_width_, benchmark_height_);
2336 MaskCpuFlags(benchmark_cpu_info_);
2337
2338 for (int i = 0; i < benchmark_iterations_; ++i) {
2339 ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2340 benchmark_width_, benchmark_width_, benchmark_height_);
2341 }
2342 for (int i = 0; i < kPixels; ++i) {
2343 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2344 }
2345
2346 free_aligned_buffer_page_end(dst_pixels_c);
2347 free_aligned_buffer_page_end(dst_pixels_opt);
2348 free_aligned_buffer_page_end(src_pixels);
2349 }
2350
TEST_F(LibYUVPlanarTest,TestARGBCopyYToAlpha)2351 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
2352 const int kPixels = benchmark_width_ * benchmark_height_;
2353 align_buffer_page_end(orig_pixels, kPixels);
2354 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
2355 align_buffer_page_end(dst_pixels_c, kPixels * 4);
2356
2357 MemRandomize(orig_pixels, kPixels);
2358 MemRandomize(dst_pixels_opt, kPixels * 4);
2359 memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4);
2360
2361 MaskCpuFlags(disable_cpu_flags_);
2362 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2363 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2364 MaskCpuFlags(benchmark_cpu_info_);
2365
2366 for (int i = 0; i < benchmark_iterations_; ++i) {
2367 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2368 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2369 }
2370 for (int i = 0; i < kPixels * 4; ++i) {
2371 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2372 }
2373
2374 free_aligned_buffer_page_end(dst_pixels_c);
2375 free_aligned_buffer_page_end(dst_pixels_opt);
2376 free_aligned_buffer_page_end(orig_pixels);
2377 }
2378
TestARGBRect(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int bpp)2379 static int TestARGBRect(int width,
2380 int height,
2381 int benchmark_iterations,
2382 int disable_cpu_flags,
2383 int benchmark_cpu_info,
2384 int invert,
2385 int off,
2386 int bpp) {
2387 if (width < 1) {
2388 width = 1;
2389 }
2390 const int kStride = width * bpp;
2391 const int kSize = kStride * height;
2392 const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
2393
2394 align_buffer_page_end(dst_argb_c, kSize + off);
2395 align_buffer_page_end(dst_argb_opt, kSize + off);
2396
2397 MemRandomize(dst_argb_c + off, kSize);
2398 memcpy(dst_argb_opt + off, dst_argb_c + off, kSize);
2399
2400 MaskCpuFlags(disable_cpu_flags);
2401 if (bpp == 4) {
2402 ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32);
2403 } else {
2404 SetPlane(dst_argb_c + off, kStride, width, invert * height, v32);
2405 }
2406
2407 MaskCpuFlags(benchmark_cpu_info);
2408 for (int i = 0; i < benchmark_iterations; ++i) {
2409 if (bpp == 4) {
2410 ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32);
2411 } else {
2412 SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32);
2413 }
2414 }
2415 int max_diff = 0;
2416 for (int i = 0; i < kStride * height; ++i) {
2417 int abs_diff = abs(static_cast<int>(dst_argb_c[i + off]) -
2418 static_cast<int>(dst_argb_opt[i + off]));
2419 if (abs_diff > max_diff) {
2420 max_diff = abs_diff;
2421 }
2422 }
2423 free_aligned_buffer_page_end(dst_argb_c);
2424 free_aligned_buffer_page_end(dst_argb_opt);
2425 return max_diff;
2426 }
2427
TEST_F(LibYUVPlanarTest,ARGBRect_Any)2428 TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
2429 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
2430 benchmark_iterations_, disable_cpu_flags_,
2431 benchmark_cpu_info_, +1, 0, 4);
2432 EXPECT_EQ(0, max_diff);
2433 }
2434
TEST_F(LibYUVPlanarTest,ARGBRect_Unaligned)2435 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) {
2436 int max_diff =
2437 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2438 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4);
2439 EXPECT_EQ(0, max_diff);
2440 }
2441
TEST_F(LibYUVPlanarTest,ARGBRect_Invert)2442 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) {
2443 int max_diff =
2444 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2445 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4);
2446 EXPECT_EQ(0, max_diff);
2447 }
2448
TEST_F(LibYUVPlanarTest,ARGBRect_Opt)2449 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
2450 int max_diff =
2451 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2452 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4);
2453 EXPECT_EQ(0, max_diff);
2454 }
2455
TEST_F(LibYUVPlanarTest,SetPlane_Any)2456 TEST_F(LibYUVPlanarTest, SetPlane_Any) {
2457 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
2458 benchmark_iterations_, disable_cpu_flags_,
2459 benchmark_cpu_info_, +1, 0, 1);
2460 EXPECT_EQ(0, max_diff);
2461 }
2462
TEST_F(LibYUVPlanarTest,SetPlane_Unaligned)2463 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) {
2464 int max_diff =
2465 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2466 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
2467 EXPECT_EQ(0, max_diff);
2468 }
2469
TEST_F(LibYUVPlanarTest,SetPlane_Invert)2470 TEST_F(LibYUVPlanarTest, SetPlane_Invert) {
2471 int max_diff =
2472 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2473 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
2474 EXPECT_EQ(0, max_diff);
2475 }
2476
TEST_F(LibYUVPlanarTest,SetPlane_Opt)2477 TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
2478 int max_diff =
2479 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2480 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
2481 EXPECT_EQ(0, max_diff);
2482 }
2483
TEST_F(LibYUVPlanarTest,MergeUVPlane_Opt)2484 TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
2485 const int kPixels = benchmark_width_ * benchmark_height_;
2486 align_buffer_page_end(src_pixels, kPixels * 2);
2487 align_buffer_page_end(tmp_pixels_u, kPixels);
2488 align_buffer_page_end(tmp_pixels_v, kPixels);
2489 align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2490 align_buffer_page_end(dst_pixels_c, kPixels * 2);
2491
2492 MemRandomize(src_pixels, kPixels * 2);
2493 MemRandomize(tmp_pixels_u, kPixels);
2494 MemRandomize(tmp_pixels_v, kPixels);
2495 MemRandomize(dst_pixels_opt, kPixels * 2);
2496 MemRandomize(dst_pixels_c, kPixels * 2);
2497
2498 MaskCpuFlags(disable_cpu_flags_);
2499 SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2500 tmp_pixels_v, benchmark_width_, benchmark_width_,
2501 benchmark_height_);
2502 MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2503 dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2504 benchmark_height_);
2505 MaskCpuFlags(benchmark_cpu_info_);
2506
2507 SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2508 tmp_pixels_v, benchmark_width_, benchmark_width_,
2509 benchmark_height_);
2510
2511 for (int i = 0; i < benchmark_iterations_; ++i) {
2512 MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2513 dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2514 benchmark_height_);
2515 }
2516
2517 for (int i = 0; i < kPixels * 2; ++i) {
2518 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2519 }
2520
2521 free_aligned_buffer_page_end(src_pixels);
2522 free_aligned_buffer_page_end(tmp_pixels_u);
2523 free_aligned_buffer_page_end(tmp_pixels_v);
2524 free_aligned_buffer_page_end(dst_pixels_opt);
2525 free_aligned_buffer_page_end(dst_pixels_c);
2526 }
2527
TEST_F(LibYUVPlanarTest,SplitUVPlane_Opt)2528 TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
2529 const int kPixels = benchmark_width_ * benchmark_height_;
2530 align_buffer_page_end(src_pixels, kPixels * 2);
2531 align_buffer_page_end(tmp_pixels_u, kPixels);
2532 align_buffer_page_end(tmp_pixels_v, kPixels);
2533 align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2534 align_buffer_page_end(dst_pixels_c, kPixels * 2);
2535
2536 MemRandomize(src_pixels, kPixels * 2);
2537 MemRandomize(tmp_pixels_u, kPixels);
2538 MemRandomize(tmp_pixels_v, kPixels);
2539 MemRandomize(dst_pixels_opt, kPixels * 2);
2540 MemRandomize(dst_pixels_c, kPixels * 2);
2541
2542 MaskCpuFlags(disable_cpu_flags_);
2543 SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2544 tmp_pixels_v, benchmark_width_, benchmark_width_,
2545 benchmark_height_);
2546 MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2547 dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2548 benchmark_height_);
2549 MaskCpuFlags(benchmark_cpu_info_);
2550
2551 for (int i = 0; i < benchmark_iterations_; ++i) {
2552 SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u,
2553 benchmark_width_, tmp_pixels_v, benchmark_width_,
2554 benchmark_width_, benchmark_height_);
2555 }
2556 MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2557 dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2558 benchmark_height_);
2559
2560 for (int i = 0; i < kPixels * 2; ++i) {
2561 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2562 }
2563
2564 free_aligned_buffer_page_end(src_pixels);
2565 free_aligned_buffer_page_end(tmp_pixels_u);
2566 free_aligned_buffer_page_end(tmp_pixels_v);
2567 free_aligned_buffer_page_end(dst_pixels_opt);
2568 free_aligned_buffer_page_end(dst_pixels_c);
2569 }
2570
TEST_F(LibYUVPlanarTest,MergeRGBPlane_Opt)2571 TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
2572 const int kPixels = benchmark_width_ * benchmark_height_;
2573 align_buffer_page_end(src_pixels, kPixels * 3);
2574 align_buffer_page_end(tmp_pixels_r, kPixels);
2575 align_buffer_page_end(tmp_pixels_g, kPixels);
2576 align_buffer_page_end(tmp_pixels_b, kPixels);
2577 align_buffer_page_end(dst_pixels_opt, kPixels * 3);
2578 align_buffer_page_end(dst_pixels_c, kPixels * 3);
2579
2580 MemRandomize(src_pixels, kPixels * 3);
2581 MemRandomize(tmp_pixels_r, kPixels);
2582 MemRandomize(tmp_pixels_g, kPixels);
2583 MemRandomize(tmp_pixels_b, kPixels);
2584 MemRandomize(dst_pixels_opt, kPixels * 3);
2585 MemRandomize(dst_pixels_c, kPixels * 3);
2586
2587 MaskCpuFlags(disable_cpu_flags_);
2588 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
2589 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
2590 benchmark_width_, benchmark_width_, benchmark_height_);
2591 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
2592 tmp_pixels_b, benchmark_width_, dst_pixels_c,
2593 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
2594 MaskCpuFlags(benchmark_cpu_info_);
2595
2596 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
2597 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
2598 benchmark_width_, benchmark_width_, benchmark_height_);
2599
2600 for (int i = 0; i < benchmark_iterations_; ++i) {
2601 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
2602 benchmark_width_, tmp_pixels_b, benchmark_width_,
2603 dst_pixels_opt, benchmark_width_ * 3, benchmark_width_,
2604 benchmark_height_);
2605 }
2606
2607 for (int i = 0; i < kPixels * 3; ++i) {
2608 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2609 }
2610
2611 free_aligned_buffer_page_end(src_pixels);
2612 free_aligned_buffer_page_end(tmp_pixels_r);
2613 free_aligned_buffer_page_end(tmp_pixels_g);
2614 free_aligned_buffer_page_end(tmp_pixels_b);
2615 free_aligned_buffer_page_end(dst_pixels_opt);
2616 free_aligned_buffer_page_end(dst_pixels_c);
2617 }
2618
TEST_F(LibYUVPlanarTest,SplitRGBPlane_Opt)2619 TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
2620 const int kPixels = benchmark_width_ * benchmark_height_;
2621 align_buffer_page_end(src_pixels, kPixels * 3);
2622 align_buffer_page_end(tmp_pixels_r, kPixels);
2623 align_buffer_page_end(tmp_pixels_g, kPixels);
2624 align_buffer_page_end(tmp_pixels_b, kPixels);
2625 align_buffer_page_end(dst_pixels_opt, kPixels * 3);
2626 align_buffer_page_end(dst_pixels_c, kPixels * 3);
2627
2628 MemRandomize(src_pixels, kPixels * 3);
2629 MemRandomize(tmp_pixels_r, kPixels);
2630 MemRandomize(tmp_pixels_g, kPixels);
2631 MemRandomize(tmp_pixels_b, kPixels);
2632 MemRandomize(dst_pixels_opt, kPixels * 3);
2633 MemRandomize(dst_pixels_c, kPixels * 3);
2634
2635 MaskCpuFlags(disable_cpu_flags_);
2636 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
2637 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
2638 benchmark_width_, benchmark_width_, benchmark_height_);
2639 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
2640 tmp_pixels_b, benchmark_width_, dst_pixels_c,
2641 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
2642 MaskCpuFlags(benchmark_cpu_info_);
2643
2644 for (int i = 0; i < benchmark_iterations_; ++i) {
2645 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
2646 benchmark_width_, tmp_pixels_g, benchmark_width_,
2647 tmp_pixels_b, benchmark_width_, benchmark_width_,
2648 benchmark_height_);
2649 }
2650 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
2651 tmp_pixels_b, benchmark_width_, dst_pixels_opt,
2652 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
2653
2654 for (int i = 0; i < kPixels * 3; ++i) {
2655 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2656 }
2657
2658 free_aligned_buffer_page_end(src_pixels);
2659 free_aligned_buffer_page_end(tmp_pixels_r);
2660 free_aligned_buffer_page_end(tmp_pixels_g);
2661 free_aligned_buffer_page_end(tmp_pixels_b);
2662 free_aligned_buffer_page_end(dst_pixels_opt);
2663 free_aligned_buffer_page_end(dst_pixels_c);
2664 }
2665
2666 // TODO(fbarchard): improve test for platforms and cpu detect
2667 #ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest,MergeUVRow_16_Opt)2668 TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
2669 const int kPixels = benchmark_width_ * benchmark_height_;
2670 align_buffer_page_end(src_pixels_u, kPixels * 2);
2671 align_buffer_page_end(src_pixels_v, kPixels * 2);
2672 align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
2673 align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2);
2674
2675 MemRandomize(src_pixels_u, kPixels * 2);
2676 MemRandomize(src_pixels_v, kPixels * 2);
2677 memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
2678 memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
2679
2680 MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
2681 reinterpret_cast<const uint16_t*>(src_pixels_v),
2682 reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 64, kPixels);
2683
2684 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
2685 for (int i = 0; i < benchmark_iterations_; ++i) {
2686 if (has_avx2) {
2687 MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
2688 reinterpret_cast<const uint16_t*>(src_pixels_v),
2689 reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
2690 kPixels);
2691 } else {
2692 MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
2693 reinterpret_cast<const uint16_t*>(src_pixels_v),
2694 reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
2695 kPixels);
2696 }
2697 }
2698
2699 for (int i = 0; i < kPixels * 2 * 2; ++i) {
2700 EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]);
2701 }
2702
2703 free_aligned_buffer_page_end(src_pixels_u);
2704 free_aligned_buffer_page_end(src_pixels_v);
2705 free_aligned_buffer_page_end(dst_pixels_uv_opt);
2706 free_aligned_buffer_page_end(dst_pixels_uv_c);
2707 }
2708 #endif
2709
2710 // TODO(fbarchard): Improve test for more platforms.
2711 #ifdef HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest,MultiplyRow_16_Opt)2712 TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
2713 const int kPixels = benchmark_width_ * benchmark_height_;
2714 align_buffer_page_end(src_pixels_y, kPixels * 2);
2715 align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
2716 align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
2717
2718 MemRandomize(src_pixels_y, kPixels * 2);
2719 memset(dst_pixels_y_opt, 0, kPixels * 2);
2720 memset(dst_pixels_y_c, 1, kPixels * 2);
2721
2722 MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
2723 reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels);
2724
2725 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
2726 for (int i = 0; i < benchmark_iterations_; ++i) {
2727 if (has_avx2) {
2728 MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
2729 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
2730 kPixels);
2731 } else {
2732 MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
2733 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
2734 kPixels);
2735 }
2736 }
2737
2738 for (int i = 0; i < kPixels * 2; ++i) {
2739 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
2740 }
2741
2742 free_aligned_buffer_page_end(src_pixels_y);
2743 free_aligned_buffer_page_end(dst_pixels_y_opt);
2744 free_aligned_buffer_page_end(dst_pixels_y_c);
2745 }
2746 #endif // HAS_MULTIPLYROW_16_AVX2
2747
TEST_F(LibYUVPlanarTest,Convert16To8Plane)2748 TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
2749 const int kPixels = benchmark_width_ * benchmark_height_;
2750 align_buffer_page_end(src_pixels_y, kPixels * 2);
2751 align_buffer_page_end(dst_pixels_y_opt, kPixels);
2752 align_buffer_page_end(dst_pixels_y_c, kPixels);
2753
2754 MemRandomize(src_pixels_y, kPixels * 2);
2755 memset(dst_pixels_y_opt, 0, kPixels);
2756 memset(dst_pixels_y_c, 1, kPixels);
2757
2758 MaskCpuFlags(disable_cpu_flags_);
2759 Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
2760 benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
2761 benchmark_width_, benchmark_height_);
2762 MaskCpuFlags(benchmark_cpu_info_);
2763
2764 for (int i = 0; i < benchmark_iterations_; ++i) {
2765 Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
2766 benchmark_width_, dst_pixels_y_opt, benchmark_width_,
2767 16384, benchmark_width_, benchmark_height_);
2768 }
2769
2770 for (int i = 0; i < kPixels; ++i) {
2771 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
2772 }
2773
2774 free_aligned_buffer_page_end(src_pixels_y);
2775 free_aligned_buffer_page_end(dst_pixels_y_opt);
2776 free_aligned_buffer_page_end(dst_pixels_y_c);
2777 }
2778
2779 // TODO(fbarchard): Improve test for more platforms.
2780 #ifdef HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert16To8Row_Opt)2781 TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
2782 // AVX2 does multiple of 32, so round count up
2783 const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
2784 align_buffer_page_end(src_pixels_y, kPixels * 2);
2785 align_buffer_page_end(dst_pixels_y_opt, kPixels);
2786 align_buffer_page_end(dst_pixels_y_c, kPixels);
2787
2788 MemRandomize(src_pixels_y, kPixels * 2);
2789 // clamp source range to 10 bits.
2790 for (int i = 0; i < kPixels; ++i) {
2791 reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023;
2792 }
2793
2794 memset(dst_pixels_y_opt, 0, kPixels);
2795 memset(dst_pixels_y_c, 1, kPixels);
2796
2797 Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
2798 dst_pixels_y_c, 16384, kPixels);
2799
2800 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
2801 int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
2802 for (int i = 0; i < benchmark_iterations_; ++i) {
2803 if (has_avx2) {
2804 Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
2805 dst_pixels_y_opt, 16384, kPixels);
2806 } else if (has_ssse3) {
2807 Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y),
2808 dst_pixels_y_opt, 16384, kPixels);
2809 } else {
2810 Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
2811 dst_pixels_y_opt, 16384, kPixels);
2812 }
2813 }
2814
2815 for (int i = 0; i < kPixels; ++i) {
2816 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
2817 }
2818
2819 free_aligned_buffer_page_end(src_pixels_y);
2820 free_aligned_buffer_page_end(dst_pixels_y_opt);
2821 free_aligned_buffer_page_end(dst_pixels_y_c);
2822 }
2823 #endif // HAS_CONVERT16TO8ROW_AVX2
2824
TEST_F(LibYUVPlanarTest,Convert8To16Plane)2825 TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
2826 const int kPixels = benchmark_width_ * benchmark_height_;
2827 align_buffer_page_end(src_pixels_y, kPixels);
2828 align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
2829 align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
2830
2831 MemRandomize(src_pixels_y, kPixels);
2832 memset(dst_pixels_y_opt, 0, kPixels * 2);
2833 memset(dst_pixels_y_c, 1, kPixels * 2);
2834
2835 MaskCpuFlags(disable_cpu_flags_);
2836 Convert8To16Plane(src_pixels_y, benchmark_width_,
2837 reinterpret_cast<uint16_t*>(dst_pixels_y_c),
2838 benchmark_width_, 1024, benchmark_width_,
2839 benchmark_height_);
2840 MaskCpuFlags(benchmark_cpu_info_);
2841
2842 for (int i = 0; i < benchmark_iterations_; ++i) {
2843 Convert8To16Plane(src_pixels_y, benchmark_width_,
2844 reinterpret_cast<uint16_t*>(dst_pixels_y_opt),
2845 benchmark_width_, 1024, benchmark_width_,
2846 benchmark_height_);
2847 }
2848
2849 for (int i = 0; i < kPixels * 2; ++i) {
2850 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
2851 }
2852
2853 free_aligned_buffer_page_end(src_pixels_y);
2854 free_aligned_buffer_page_end(dst_pixels_y_opt);
2855 free_aligned_buffer_page_end(dst_pixels_y_c);
2856 }
2857
2858 // TODO(fbarchard): Improve test for more platforms.
2859 #ifdef HAS_CONVERT8TO16ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert8To16Row_Opt)2860 TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
2861 const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
2862 align_buffer_page_end(src_pixels_y, kPixels);
2863 align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
2864 align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
2865
2866 MemRandomize(src_pixels_y, kPixels);
2867 memset(dst_pixels_y_opt, 0, kPixels * 2);
2868 memset(dst_pixels_y_c, 1, kPixels * 2);
2869
2870 Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c),
2871 1024, kPixels);
2872
2873 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
2874 int has_sse2 = TestCpuFlag(kCpuHasSSE2);
2875 for (int i = 0; i < benchmark_iterations_; ++i) {
2876 if (has_avx2) {
2877 Convert8To16Row_AVX2(src_pixels_y,
2878 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
2879 kPixels);
2880 } else if (has_sse2) {
2881 Convert8To16Row_SSE2(src_pixels_y,
2882 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
2883 kPixels);
2884 } else {
2885 Convert8To16Row_C(src_pixels_y,
2886 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
2887 kPixels);
2888 }
2889 }
2890
2891 for (int i = 0; i < kPixels * 2; ++i) {
2892 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
2893 }
2894
2895 free_aligned_buffer_page_end(src_pixels_y);
2896 free_aligned_buffer_page_end(dst_pixels_y_opt);
2897 free_aligned_buffer_page_end(dst_pixels_y_c);
2898 }
2899 #endif // HAS_CONVERT8TO16ROW_AVX2
2900
TestScaleMaxSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)2901 float TestScaleMaxSamples(int benchmark_width,
2902 int benchmark_height,
2903 int benchmark_iterations,
2904 float scale,
2905 bool opt) {
2906 int i, j;
2907 float max_c, max_opt = 0.f;
2908 // NEON does multiple of 8, so round count up
2909 const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
2910 align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48);
2911 uint8_t* dst_c = orig_y + kPixels * 4 + 16;
2912 uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32;
2913
2914 // Randomize works but may contain some denormals affecting performance.
2915 // MemRandomize(orig_y, kPixels * 4);
2916 // large values are problematic. audio is really -1 to 1.
2917 for (i = 0; i < kPixels; ++i) {
2918 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
2919 }
2920 memset(dst_c, 0, kPixels * 4);
2921 memset(dst_opt, 1, kPixels * 4);
2922
2923 max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
2924 reinterpret_cast<float*>(dst_c), scale, kPixels);
2925
2926 for (j = 0; j < benchmark_iterations; j++) {
2927 if (opt) {
2928 #ifdef HAS_SCALESUMSAMPLES_NEON
2929 max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y),
2930 reinterpret_cast<float*>(dst_opt), scale,
2931 kPixels);
2932 #else
2933 max_opt =
2934 ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
2935 reinterpret_cast<float*>(dst_opt), scale, kPixels);
2936 #endif
2937 } else {
2938 max_opt =
2939 ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
2940 reinterpret_cast<float*>(dst_opt), scale, kPixels);
2941 }
2942 }
2943
2944 float max_diff = FAbs(max_opt - max_c);
2945 for (i = 0; i < kPixels; ++i) {
2946 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
2947 (reinterpret_cast<float*>(dst_opt)[i]));
2948 if (abs_diff > max_diff) {
2949 max_diff = abs_diff;
2950 }
2951 }
2952
2953 free_aligned_buffer_page_end(orig_y);
2954 return max_diff;
2955 }
2956
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_C)2957 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) {
2958 float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
2959 benchmark_iterations_, 1.2f, false);
2960 EXPECT_EQ(0, diff);
2961 }
2962
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_Opt)2963 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) {
2964 float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
2965 benchmark_iterations_, 1.2f, true);
2966 EXPECT_EQ(0, diff);
2967 }
2968
TestScaleSumSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)2969 float TestScaleSumSamples(int benchmark_width,
2970 int benchmark_height,
2971 int benchmark_iterations,
2972 float scale,
2973 bool opt) {
2974 int i, j;
2975 float sum_c, sum_opt = 0.f;
2976 // NEON does multiple of 8, so round count up
2977 const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
2978 align_buffer_page_end(orig_y, kPixels * 4 * 3);
2979 uint8_t* dst_c = orig_y + kPixels * 4;
2980 uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
2981
2982 // Randomize works but may contain some denormals affecting performance.
2983 // MemRandomize(orig_y, kPixels * 4);
2984 // large values are problematic. audio is really -1 to 1.
2985 for (i = 0; i < kPixels; ++i) {
2986 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
2987 }
2988 memset(dst_c, 0, kPixels * 4);
2989 memset(dst_opt, 1, kPixels * 4);
2990
2991 sum_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
2992 reinterpret_cast<float*>(dst_c), scale, kPixels);
2993
2994 for (j = 0; j < benchmark_iterations; j++) {
2995 if (opt) {
2996 #ifdef HAS_SCALESUMSAMPLES_NEON
2997 sum_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y),
2998 reinterpret_cast<float*>(dst_opt), scale,
2999 kPixels);
3000 #else
3001 sum_opt =
3002 ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
3003 reinterpret_cast<float*>(dst_opt), scale, kPixels);
3004 #endif
3005 } else {
3006 sum_opt =
3007 ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
3008 reinterpret_cast<float*>(dst_opt), scale, kPixels);
3009 }
3010 }
3011
3012 float mse_opt = sum_opt / kPixels * 4;
3013 float mse_c = sum_c / kPixels * 4;
3014 float mse_error = FAbs(mse_opt - mse_c) / mse_c;
3015
3016 // If the sum of a float is more than 4 million, small adds are round down on
3017 // float and produce different results with vectorized sum vs scalar sum.
3018 // Ignore the difference if the sum is large.
3019 float max_diff = 0.f;
3020 if (mse_error > 0.0001 && sum_c < 4000000) { // allow .01% difference of mse
3021 max_diff = mse_error;
3022 }
3023
3024 for (i = 0; i < kPixels; ++i) {
3025 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3026 (reinterpret_cast<float*>(dst_opt)[i]));
3027 if (abs_diff > max_diff) {
3028 max_diff = abs_diff;
3029 }
3030 }
3031
3032 free_aligned_buffer_page_end(orig_y);
3033 return max_diff;
3034 }
3035
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_C)3036 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) {
3037 float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
3038 benchmark_iterations_, 1.2f, false);
3039 EXPECT_EQ(0, diff);
3040 }
3041
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_Opt)3042 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) {
3043 float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
3044 benchmark_iterations_, 1.2f, true);
3045 EXPECT_EQ(0, diff);
3046 }
3047
TestScaleSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3048 float TestScaleSamples(int benchmark_width,
3049 int benchmark_height,
3050 int benchmark_iterations,
3051 float scale,
3052 bool opt) {
3053 int i, j;
3054 // NEON does multiple of 8, so round count up
3055 const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3056 align_buffer_page_end(orig_y, kPixels * 4 * 3);
3057 uint8_t* dst_c = orig_y + kPixels * 4;
3058 uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
3059
3060 // Randomize works but may contain some denormals affecting performance.
3061 // MemRandomize(orig_y, kPixels * 4);
3062 // large values are problematic. audio is really -1 to 1.
3063 for (i = 0; i < kPixels; ++i) {
3064 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3065 }
3066 memset(dst_c, 0, kPixels * 4);
3067 memset(dst_opt, 1, kPixels * 4);
3068
3069 ScaleSamples_C(reinterpret_cast<float*>(orig_y),
3070 reinterpret_cast<float*>(dst_c), scale, kPixels);
3071
3072 for (j = 0; j < benchmark_iterations; j++) {
3073 if (opt) {
3074 #ifdef HAS_SCALESUMSAMPLES_NEON
3075 ScaleSamples_NEON(reinterpret_cast<float*>(orig_y),
3076 reinterpret_cast<float*>(dst_opt), scale, kPixels);
3077 #else
3078 ScaleSamples_C(reinterpret_cast<float*>(orig_y),
3079 reinterpret_cast<float*>(dst_opt), scale, kPixels);
3080 #endif
3081 } else {
3082 ScaleSamples_C(reinterpret_cast<float*>(orig_y),
3083 reinterpret_cast<float*>(dst_opt), scale, kPixels);
3084 }
3085 }
3086
3087 float max_diff = 0.f;
3088 for (i = 0; i < kPixels; ++i) {
3089 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3090 (reinterpret_cast<float*>(dst_opt)[i]));
3091 if (abs_diff > max_diff) {
3092 max_diff = abs_diff;
3093 }
3094 }
3095
3096 free_aligned_buffer_page_end(orig_y);
3097 return max_diff;
3098 }
3099
TEST_F(LibYUVPlanarTest,TestScaleSamples_C)3100 TEST_F(LibYUVPlanarTest, TestScaleSamples_C) {
3101 float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
3102 benchmark_iterations_, 1.2f, false);
3103 EXPECT_EQ(0, diff);
3104 }
3105
TEST_F(LibYUVPlanarTest,TestScaleSamples_Opt)3106 TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) {
3107 float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
3108 benchmark_iterations_, 1.2f, true);
3109 EXPECT_EQ(0, diff);
3110 }
3111
TestCopySamples(int benchmark_width,int benchmark_height,int benchmark_iterations,bool opt)3112 float TestCopySamples(int benchmark_width,
3113 int benchmark_height,
3114 int benchmark_iterations,
3115 bool opt) {
3116 int i, j;
3117 // NEON does multiple of 16 floats, so round count up
3118 const int kPixels = (benchmark_width * benchmark_height + 15) & ~15;
3119 align_buffer_page_end(orig_y, kPixels * 4 * 3);
3120 uint8_t* dst_c = orig_y + kPixels * 4;
3121 uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
3122
3123 // Randomize works but may contain some denormals affecting performance.
3124 // MemRandomize(orig_y, kPixels * 4);
3125 // large values are problematic. audio is really -1 to 1.
3126 for (i = 0; i < kPixels; ++i) {
3127 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3128 }
3129 memset(dst_c, 0, kPixels * 4);
3130 memset(dst_opt, 1, kPixels * 4);
3131
3132 memcpy(reinterpret_cast<void*>(dst_c), reinterpret_cast<void*>(orig_y),
3133 kPixels * 4);
3134
3135 for (j = 0; j < benchmark_iterations; j++) {
3136 if (opt) {
3137 #ifdef HAS_COPYROW_NEON
3138 CopyRow_NEON(orig_y, dst_opt, kPixels * 4);
3139 #else
3140 CopyRow_C(orig_y, dst_opt, kPixels * 4);
3141 #endif
3142 } else {
3143 CopyRow_C(orig_y, dst_opt, kPixels * 4);
3144 }
3145 }
3146
3147 float max_diff = 0.f;
3148 for (i = 0; i < kPixels; ++i) {
3149 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3150 (reinterpret_cast<float*>(dst_opt)[i]));
3151 if (abs_diff > max_diff) {
3152 max_diff = abs_diff;
3153 }
3154 }
3155
3156 free_aligned_buffer_page_end(orig_y);
3157 return max_diff;
3158 }
3159
TEST_F(LibYUVPlanarTest,TestCopySamples_C)3160 TEST_F(LibYUVPlanarTest, TestCopySamples_C) {
3161 float diff = TestCopySamples(benchmark_width_, benchmark_height_,
3162 benchmark_iterations_, false);
3163 EXPECT_EQ(0, diff);
3164 }
3165
TEST_F(LibYUVPlanarTest,TestCopySamples_Opt)3166 TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) {
3167 float diff = TestCopySamples(benchmark_width_, benchmark_height_,
3168 benchmark_iterations_, true);
3169 EXPECT_EQ(0, diff);
3170 }
3171
3172 extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
3173 extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
3174
TEST_F(LibYUVPlanarTest,TestGaussRow_Opt)3175 TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
3176 SIMD_ALIGNED(uint32_t orig_pixels[640 + 4]);
3177 SIMD_ALIGNED(uint16_t dst_pixels_c[640]);
3178 SIMD_ALIGNED(uint16_t dst_pixels_opt[640]);
3179
3180 memset(orig_pixels, 0, sizeof(orig_pixels));
3181 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
3182 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
3183
3184 for (int i = 0; i < 640 + 4; ++i) {
3185 orig_pixels[i] = i * 256;
3186 }
3187 GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640);
3188 for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
3189 #if !defined(LIBYUV_DISABLE_NEON) && \
3190 (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
3191 int has_neon = TestCpuFlag(kCpuHasNEON);
3192 if (has_neon) {
3193 GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640);
3194 } else {
3195 GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640);
3196 }
3197 #else
3198 GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640);
3199 #endif
3200 }
3201
3202 for (int i = 0; i < 640; ++i) {
3203 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3204 }
3205
3206 EXPECT_EQ(dst_pixels_c[0],
3207 static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1));
3208 EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256));
3209 }
3210
3211 extern "C" void GaussCol_NEON(const uint16_t* src0,
3212 const uint16_t* src1,
3213 const uint16_t* src2,
3214 const uint16_t* src3,
3215 const uint16_t* src4,
3216 uint32_t* dst,
3217 int width);
3218
3219 extern "C" void GaussCol_C(const uint16_t* src0,
3220 const uint16_t* src1,
3221 const uint16_t* src2,
3222 const uint16_t* src3,
3223 const uint16_t* src4,
3224 uint32_t* dst,
3225 int width);
3226
TEST_F(LibYUVPlanarTest,TestGaussCol_Opt)3227 TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
3228 SIMD_ALIGNED(uint16_t orig_pixels[640 * 5]);
3229 SIMD_ALIGNED(uint32_t dst_pixels_c[640]);
3230 SIMD_ALIGNED(uint32_t dst_pixels_opt[640]);
3231
3232 memset(orig_pixels, 0, sizeof(orig_pixels));
3233 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
3234 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
3235
3236 for (int i = 0; i < 640 * 5; ++i) {
3237 orig_pixels[i] = i;
3238 }
3239 GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
3240 &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0],
3241 640);
3242 for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
3243 #if !defined(LIBYUV_DISABLE_NEON) && \
3244 (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
3245 int has_neon = TestCpuFlag(kCpuHasNEON);
3246 if (has_neon) {
3247 GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
3248 &orig_pixels[640 * 3], &orig_pixels[640 * 4],
3249 &dst_pixels_opt[0], 640);
3250 } else {
3251 GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
3252 &orig_pixels[640 * 3], &orig_pixels[640 * 4],
3253 &dst_pixels_opt[0], 640);
3254 }
3255 #else
3256 GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
3257 &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_opt[0],
3258 640);
3259 #endif
3260 }
3261
3262 for (int i = 0; i < 640; ++i) {
3263 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3264 }
3265
3266 EXPECT_EQ(dst_pixels_c[0],
3267 static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + 640 * 3 * 4 +
3268 640 * 4 * 1));
3269 EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
3270 }
3271
TestFloatDivToByte(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3272 float TestFloatDivToByte(int benchmark_width,
3273 int benchmark_height,
3274 int benchmark_iterations,
3275 float scale,
3276 bool opt) {
3277 int i, j;
3278 // NEON does multiple of 8, so round count up
3279 const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3280 align_buffer_page_end(src_weights, kPixels * 4);
3281 align_buffer_page_end(src_values, kPixels * 4);
3282 align_buffer_page_end(dst_out_c, kPixels);
3283 align_buffer_page_end(dst_out_opt, kPixels);
3284 align_buffer_page_end(dst_mask_c, kPixels);
3285 align_buffer_page_end(dst_mask_opt, kPixels);
3286
3287 // Randomize works but may contain some denormals affecting performance.
3288 // MemRandomize(orig_y, kPixels * 4);
3289 // large values are problematic. audio is really -1 to 1.
3290 for (i = 0; i < kPixels; ++i) {
3291 (reinterpret_cast<float*>(src_weights))[i] = scale;
3292 (reinterpret_cast<float*>(src_values))[i] =
3293 sinf(static_cast<float>(i) * 0.1f);
3294 }
3295 memset(dst_out_c, 0, kPixels);
3296 memset(dst_out_opt, 1, kPixels);
3297 memset(dst_mask_c, 2, kPixels);
3298 memset(dst_mask_opt, 3, kPixels);
3299
3300 FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
3301 reinterpret_cast<float*>(src_values), dst_out_c,
3302 dst_mask_c, kPixels);
3303
3304 for (j = 0; j < benchmark_iterations; j++) {
3305 if (opt) {
3306 #ifdef HAS_FLOATDIVTOBYTEROW_NEON
3307 FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights),
3308 reinterpret_cast<float*>(src_values), dst_out_opt,
3309 dst_mask_opt, kPixels);
3310 #else
3311 FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
3312 reinterpret_cast<float*>(src_values), dst_out_opt,
3313 dst_mask_opt, kPixels);
3314 #endif
3315 } else {
3316 FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
3317 reinterpret_cast<float*>(src_values), dst_out_opt,
3318 dst_mask_opt, kPixels);
3319 }
3320 }
3321
3322 uint8_t max_diff = 0;
3323 for (i = 0; i < kPixels; ++i) {
3324 uint8_t abs_diff = abs(dst_out_c[i] - dst_out_opt[i]) +
3325 abs(dst_mask_c[i] - dst_mask_opt[i]);
3326 if (abs_diff > max_diff) {
3327 max_diff = abs_diff;
3328 }
3329 }
3330
3331 free_aligned_buffer_page_end(src_weights);
3332 free_aligned_buffer_page_end(src_values);
3333 free_aligned_buffer_page_end(dst_out_c);
3334 free_aligned_buffer_page_end(dst_out_opt);
3335 free_aligned_buffer_page_end(dst_mask_c);
3336 free_aligned_buffer_page_end(dst_mask_opt);
3337
3338 return max_diff;
3339 }
3340
TEST_F(LibYUVPlanarTest,TestFloatDivToByte_C)3341 TEST_F(LibYUVPlanarTest, TestFloatDivToByte_C) {
3342 float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
3343 benchmark_iterations_, 1.2f, false);
3344 EXPECT_EQ(0, diff);
3345 }
3346
TEST_F(LibYUVPlanarTest,TestFloatDivToByte_Opt)3347 TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) {
3348 float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
3349 benchmark_iterations_, 1.2f, true);
3350 EXPECT_EQ(0, diff);
3351 }
3352
TEST_F(LibYUVPlanarTest,UVToVURow)3353 TEST_F(LibYUVPlanarTest, UVToVURow) {
3354 const int kPixels = benchmark_width_ * benchmark_height_;
3355 align_buffer_page_end(src_pixels_vu, kPixels * 2);
3356 align_buffer_page_end(dst_pixels_uv, kPixels * 2);
3357
3358 MemRandomize(src_pixels_vu, kPixels * 2);
3359 memset(dst_pixels_uv, 1, kPixels * 2);
3360
3361 UVToVURow_C(src_pixels_vu, dst_pixels_uv, kPixels);
3362
3363 for (int i = 0; i < kPixels; ++i) {
3364 EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
3365 EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
3366 }
3367
3368 free_aligned_buffer_page_end(src_pixels_vu);
3369 free_aligned_buffer_page_end(dst_pixels_uv);
3370 }
3371
3372 } // namespace libyuv
3373