1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <stdlib.h>
12 #include <time.h>
13
14 #include "../unit_test/unit_test.h"
15 #include "libyuv/compare.h"
16 #include "libyuv/convert.h"
17 #include "libyuv/convert_argb.h"
18 #include "libyuv/convert_from.h"
19 #include "libyuv/convert_from_argb.h"
20 #include "libyuv/cpu_id.h"
21 #include "libyuv/planar_functions.h"
22 #include "libyuv/rotate.h"
23
24 namespace libyuv {
25
TEST_F(LibYUVPlanarTest,TestAttenuate)26 TEST_F(LibYUVPlanarTest, TestAttenuate) {
27 const int kSize = 1280 * 4;
28 align_buffer_page_end(orig_pixels, kSize);
29 align_buffer_page_end(atten_pixels, kSize);
30 align_buffer_page_end(unatten_pixels, kSize);
31 align_buffer_page_end(atten2_pixels, kSize);
32
33 // Test unattenuation clamps
34 orig_pixels[0 * 4 + 0] = 200u;
35 orig_pixels[0 * 4 + 1] = 129u;
36 orig_pixels[0 * 4 + 2] = 127u;
37 orig_pixels[0 * 4 + 3] = 128u;
38 // Test unattenuation transparent and opaque are unaffected
39 orig_pixels[1 * 4 + 0] = 16u;
40 orig_pixels[1 * 4 + 1] = 64u;
41 orig_pixels[1 * 4 + 2] = 192u;
42 orig_pixels[1 * 4 + 3] = 0u;
43 orig_pixels[2 * 4 + 0] = 16u;
44 orig_pixels[2 * 4 + 1] = 64u;
45 orig_pixels[2 * 4 + 2] = 192u;
46 orig_pixels[2 * 4 + 3] = 255u;
47 orig_pixels[3 * 4 + 0] = 16u;
48 orig_pixels[3 * 4 + 1] = 64u;
49 orig_pixels[3 * 4 + 2] = 192u;
50 orig_pixels[3 * 4 + 3] = 128u;
51 ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1);
52 EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
53 EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
54 EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
55 EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]);
56 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]);
57 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
58 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
59 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
60 EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]);
61 EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]);
62 EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]);
63 EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]);
64 EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]);
65 EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]);
66 EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]);
67 EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]);
68
69 for (int i = 0; i < 1280; ++i) {
70 orig_pixels[i * 4 + 0] = i;
71 orig_pixels[i * 4 + 1] = i / 2;
72 orig_pixels[i * 4 + 2] = i / 3;
73 orig_pixels[i * 4 + 3] = i;
74 }
75 ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1);
76 ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1);
77 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
78 ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
79 }
80 for (int i = 0; i < 1280; ++i) {
81 EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2);
82 EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2);
83 EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2);
84 EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2);
85 }
86 // Make sure transparent, 50% and opaque are fully accurate.
87 EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
88 EXPECT_EQ(0, atten_pixels[0 * 4 + 1]);
89 EXPECT_EQ(0, atten_pixels[0 * 4 + 2]);
90 EXPECT_EQ(0, atten_pixels[0 * 4 + 3]);
91 EXPECT_EQ(64, atten_pixels[128 * 4 + 0]);
92 EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
93 EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
94 EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
95 EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1);
96 EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1);
97 EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1);
98 EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
99
100 free_aligned_buffer_page_end(atten2_pixels);
101 free_aligned_buffer_page_end(unatten_pixels);
102 free_aligned_buffer_page_end(atten_pixels);
103 free_aligned_buffer_page_end(orig_pixels);
104 }
105
TestAttenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)106 static int TestAttenuateI(int width,
107 int height,
108 int benchmark_iterations,
109 int disable_cpu_flags,
110 int benchmark_cpu_info,
111 int invert,
112 int off) {
113 if (width < 1) {
114 width = 1;
115 }
116 const int kBpp = 4;
117 const int kStride = width * kBpp;
118 align_buffer_page_end(src_argb, kStride * height + off);
119 align_buffer_page_end(dst_argb_c, kStride * height);
120 align_buffer_page_end(dst_argb_opt, kStride * height);
121 for (int i = 0; i < kStride * height; ++i) {
122 src_argb[i + off] = (fastrand() & 0xff);
123 }
124 memset(dst_argb_c, 0, kStride * height);
125 memset(dst_argb_opt, 0, kStride * height);
126
127 MaskCpuFlags(disable_cpu_flags);
128 ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
129 invert * height);
130 MaskCpuFlags(benchmark_cpu_info);
131 for (int i = 0; i < benchmark_iterations; ++i) {
132 ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
133 invert * height);
134 }
135 int max_diff = 0;
136 for (int i = 0; i < kStride * height; ++i) {
137 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
138 static_cast<int>(dst_argb_opt[i]));
139 if (abs_diff > max_diff) {
140 max_diff = abs_diff;
141 }
142 }
143 free_aligned_buffer_page_end(src_argb);
144 free_aligned_buffer_page_end(dst_argb_c);
145 free_aligned_buffer_page_end(dst_argb_opt);
146 return max_diff;
147 }
148
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Any)149 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
150 int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_,
151 benchmark_iterations_, disable_cpu_flags_,
152 benchmark_cpu_info_, +1, 0);
153 EXPECT_LE(max_diff, 2);
154 }
155
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Unaligned)156 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
157 int max_diff =
158 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
159 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
160 EXPECT_LE(max_diff, 2);
161 }
162
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Invert)163 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
164 int max_diff =
165 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
166 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
167 EXPECT_LE(max_diff, 2);
168 }
169
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Opt)170 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
171 int max_diff =
172 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
173 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
174 EXPECT_LE(max_diff, 2);
175 }
176
TestUnattenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)177 static int TestUnattenuateI(int width,
178 int height,
179 int benchmark_iterations,
180 int disable_cpu_flags,
181 int benchmark_cpu_info,
182 int invert,
183 int off) {
184 if (width < 1) {
185 width = 1;
186 }
187 const int kBpp = 4;
188 const int kStride = width * kBpp;
189 align_buffer_page_end(src_argb, kStride * height + off);
190 align_buffer_page_end(dst_argb_c, kStride * height);
191 align_buffer_page_end(dst_argb_opt, kStride * height);
192 for (int i = 0; i < kStride * height; ++i) {
193 src_argb[i + off] = (fastrand() & 0xff);
194 }
195 ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width,
196 height);
197 memset(dst_argb_c, 0, kStride * height);
198 memset(dst_argb_opt, 0, kStride * height);
199
200 MaskCpuFlags(disable_cpu_flags);
201 ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
202 invert * height);
203 MaskCpuFlags(benchmark_cpu_info);
204 for (int i = 0; i < benchmark_iterations; ++i) {
205 ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
206 invert * height);
207 }
208 int max_diff = 0;
209 for (int i = 0; i < kStride * height; ++i) {
210 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
211 static_cast<int>(dst_argb_opt[i]));
212 if (abs_diff > max_diff) {
213 max_diff = abs_diff;
214 }
215 }
216 free_aligned_buffer_page_end(src_argb);
217 free_aligned_buffer_page_end(dst_argb_c);
218 free_aligned_buffer_page_end(dst_argb_opt);
219 return max_diff;
220 }
221
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Any)222 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
223 int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_,
224 benchmark_iterations_, disable_cpu_flags_,
225 benchmark_cpu_info_, +1, 0);
226 EXPECT_LE(max_diff, 2);
227 }
228
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Unaligned)229 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
230 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
231 benchmark_iterations_, disable_cpu_flags_,
232 benchmark_cpu_info_, +1, 1);
233 EXPECT_LE(max_diff, 2);
234 }
235
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Invert)236 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
237 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
238 benchmark_iterations_, disable_cpu_flags_,
239 benchmark_cpu_info_, -1, 0);
240 EXPECT_LE(max_diff, 2);
241 }
242
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Opt)243 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
244 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
245 benchmark_iterations_, disable_cpu_flags_,
246 benchmark_cpu_info_, +1, 0);
247 EXPECT_LE(max_diff, 2);
248 }
249
TEST_F(LibYUVPlanarTest,TestARGBComputeCumulativeSum)250 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
251 SIMD_ALIGNED(uint8 orig_pixels[16][16][4]);
252 SIMD_ALIGNED(int32 added_pixels[16][16][4]);
253
254 for (int y = 0; y < 16; ++y) {
255 for (int x = 0; x < 16; ++x) {
256 orig_pixels[y][x][0] = 1u;
257 orig_pixels[y][x][1] = 2u;
258 orig_pixels[y][x][2] = 3u;
259 orig_pixels[y][x][3] = 255u;
260 }
261 }
262
263 ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4,
264 &added_pixels[0][0][0], 16 * 4, 16, 16);
265
266 for (int y = 0; y < 16; ++y) {
267 for (int x = 0; x < 16; ++x) {
268 EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]);
269 EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]);
270 EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]);
271 EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]);
272 }
273 }
274 }
275
TEST_F(LibYUVPlanarTest,TestARGBGray)276 TEST_F(LibYUVPlanarTest, TestARGBGray) {
277 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
278 memset(orig_pixels, 0, sizeof(orig_pixels));
279
280 // Test blue
281 orig_pixels[0][0] = 255u;
282 orig_pixels[0][1] = 0u;
283 orig_pixels[0][2] = 0u;
284 orig_pixels[0][3] = 128u;
285 // Test green
286 orig_pixels[1][0] = 0u;
287 orig_pixels[1][1] = 255u;
288 orig_pixels[1][2] = 0u;
289 orig_pixels[1][3] = 0u;
290 // Test red
291 orig_pixels[2][0] = 0u;
292 orig_pixels[2][1] = 0u;
293 orig_pixels[2][2] = 255u;
294 orig_pixels[2][3] = 255u;
295 // Test black
296 orig_pixels[3][0] = 0u;
297 orig_pixels[3][1] = 0u;
298 orig_pixels[3][2] = 0u;
299 orig_pixels[3][3] = 255u;
300 // Test white
301 orig_pixels[4][0] = 255u;
302 orig_pixels[4][1] = 255u;
303 orig_pixels[4][2] = 255u;
304 orig_pixels[4][3] = 255u;
305 // Test color
306 orig_pixels[5][0] = 16u;
307 orig_pixels[5][1] = 64u;
308 orig_pixels[5][2] = 192u;
309 orig_pixels[5][3] = 224u;
310 // Do 16 to test asm version.
311 ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
312 EXPECT_EQ(30u, orig_pixels[0][0]);
313 EXPECT_EQ(30u, orig_pixels[0][1]);
314 EXPECT_EQ(30u, orig_pixels[0][2]);
315 EXPECT_EQ(128u, orig_pixels[0][3]);
316 EXPECT_EQ(149u, orig_pixels[1][0]);
317 EXPECT_EQ(149u, orig_pixels[1][1]);
318 EXPECT_EQ(149u, orig_pixels[1][2]);
319 EXPECT_EQ(0u, orig_pixels[1][3]);
320 EXPECT_EQ(76u, orig_pixels[2][0]);
321 EXPECT_EQ(76u, orig_pixels[2][1]);
322 EXPECT_EQ(76u, orig_pixels[2][2]);
323 EXPECT_EQ(255u, orig_pixels[2][3]);
324 EXPECT_EQ(0u, orig_pixels[3][0]);
325 EXPECT_EQ(0u, orig_pixels[3][1]);
326 EXPECT_EQ(0u, orig_pixels[3][2]);
327 EXPECT_EQ(255u, orig_pixels[3][3]);
328 EXPECT_EQ(255u, orig_pixels[4][0]);
329 EXPECT_EQ(255u, orig_pixels[4][1]);
330 EXPECT_EQ(255u, orig_pixels[4][2]);
331 EXPECT_EQ(255u, orig_pixels[4][3]);
332 EXPECT_EQ(96u, orig_pixels[5][0]);
333 EXPECT_EQ(96u, orig_pixels[5][1]);
334 EXPECT_EQ(96u, orig_pixels[5][2]);
335 EXPECT_EQ(224u, orig_pixels[5][3]);
336 for (int i = 0; i < 1280; ++i) {
337 orig_pixels[i][0] = i;
338 orig_pixels[i][1] = i / 2;
339 orig_pixels[i][2] = i / 3;
340 orig_pixels[i][3] = i;
341 }
342 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
343 ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
344 }
345 }
346
TEST_F(LibYUVPlanarTest,TestARGBGrayTo)347 TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
348 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
349 SIMD_ALIGNED(uint8 gray_pixels[1280][4]);
350 memset(orig_pixels, 0, sizeof(orig_pixels));
351
352 // Test blue
353 orig_pixels[0][0] = 255u;
354 orig_pixels[0][1] = 0u;
355 orig_pixels[0][2] = 0u;
356 orig_pixels[0][3] = 128u;
357 // Test green
358 orig_pixels[1][0] = 0u;
359 orig_pixels[1][1] = 255u;
360 orig_pixels[1][2] = 0u;
361 orig_pixels[1][3] = 0u;
362 // Test red
363 orig_pixels[2][0] = 0u;
364 orig_pixels[2][1] = 0u;
365 orig_pixels[2][2] = 255u;
366 orig_pixels[2][3] = 255u;
367 // Test black
368 orig_pixels[3][0] = 0u;
369 orig_pixels[3][1] = 0u;
370 orig_pixels[3][2] = 0u;
371 orig_pixels[3][3] = 255u;
372 // Test white
373 orig_pixels[4][0] = 255u;
374 orig_pixels[4][1] = 255u;
375 orig_pixels[4][2] = 255u;
376 orig_pixels[4][3] = 255u;
377 // Test color
378 orig_pixels[5][0] = 16u;
379 orig_pixels[5][1] = 64u;
380 orig_pixels[5][2] = 192u;
381 orig_pixels[5][3] = 224u;
382 // Do 16 to test asm version.
383 ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
384 EXPECT_EQ(30u, gray_pixels[0][0]);
385 EXPECT_EQ(30u, gray_pixels[0][1]);
386 EXPECT_EQ(30u, gray_pixels[0][2]);
387 EXPECT_EQ(128u, gray_pixels[0][3]);
388 EXPECT_EQ(149u, gray_pixels[1][0]);
389 EXPECT_EQ(149u, gray_pixels[1][1]);
390 EXPECT_EQ(149u, gray_pixels[1][2]);
391 EXPECT_EQ(0u, gray_pixels[1][3]);
392 EXPECT_EQ(76u, gray_pixels[2][0]);
393 EXPECT_EQ(76u, gray_pixels[2][1]);
394 EXPECT_EQ(76u, gray_pixels[2][2]);
395 EXPECT_EQ(255u, gray_pixels[2][3]);
396 EXPECT_EQ(0u, gray_pixels[3][0]);
397 EXPECT_EQ(0u, gray_pixels[3][1]);
398 EXPECT_EQ(0u, gray_pixels[3][2]);
399 EXPECT_EQ(255u, gray_pixels[3][3]);
400 EXPECT_EQ(255u, gray_pixels[4][0]);
401 EXPECT_EQ(255u, gray_pixels[4][1]);
402 EXPECT_EQ(255u, gray_pixels[4][2]);
403 EXPECT_EQ(255u, gray_pixels[4][3]);
404 EXPECT_EQ(96u, gray_pixels[5][0]);
405 EXPECT_EQ(96u, gray_pixels[5][1]);
406 EXPECT_EQ(96u, gray_pixels[5][2]);
407 EXPECT_EQ(224u, gray_pixels[5][3]);
408 for (int i = 0; i < 1280; ++i) {
409 orig_pixels[i][0] = i;
410 orig_pixels[i][1] = i / 2;
411 orig_pixels[i][2] = i / 3;
412 orig_pixels[i][3] = i;
413 }
414 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
415 ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
416 }
417 }
418
TEST_F(LibYUVPlanarTest,TestARGBSepia)419 TEST_F(LibYUVPlanarTest, TestARGBSepia) {
420 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
421 memset(orig_pixels, 0, sizeof(orig_pixels));
422
423 // Test blue
424 orig_pixels[0][0] = 255u;
425 orig_pixels[0][1] = 0u;
426 orig_pixels[0][2] = 0u;
427 orig_pixels[0][3] = 128u;
428 // Test green
429 orig_pixels[1][0] = 0u;
430 orig_pixels[1][1] = 255u;
431 orig_pixels[1][2] = 0u;
432 orig_pixels[1][3] = 0u;
433 // Test red
434 orig_pixels[2][0] = 0u;
435 orig_pixels[2][1] = 0u;
436 orig_pixels[2][2] = 255u;
437 orig_pixels[2][3] = 255u;
438 // Test black
439 orig_pixels[3][0] = 0u;
440 orig_pixels[3][1] = 0u;
441 orig_pixels[3][2] = 0u;
442 orig_pixels[3][3] = 255u;
443 // Test white
444 orig_pixels[4][0] = 255u;
445 orig_pixels[4][1] = 255u;
446 orig_pixels[4][2] = 255u;
447 orig_pixels[4][3] = 255u;
448 // Test color
449 orig_pixels[5][0] = 16u;
450 orig_pixels[5][1] = 64u;
451 orig_pixels[5][2] = 192u;
452 orig_pixels[5][3] = 224u;
453 // Do 16 to test asm version.
454 ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
455 EXPECT_EQ(33u, orig_pixels[0][0]);
456 EXPECT_EQ(43u, orig_pixels[0][1]);
457 EXPECT_EQ(47u, orig_pixels[0][2]);
458 EXPECT_EQ(128u, orig_pixels[0][3]);
459 EXPECT_EQ(135u, orig_pixels[1][0]);
460 EXPECT_EQ(175u, orig_pixels[1][1]);
461 EXPECT_EQ(195u, orig_pixels[1][2]);
462 EXPECT_EQ(0u, orig_pixels[1][3]);
463 EXPECT_EQ(69u, orig_pixels[2][0]);
464 EXPECT_EQ(89u, orig_pixels[2][1]);
465 EXPECT_EQ(99u, orig_pixels[2][2]);
466 EXPECT_EQ(255u, orig_pixels[2][3]);
467 EXPECT_EQ(0u, orig_pixels[3][0]);
468 EXPECT_EQ(0u, orig_pixels[3][1]);
469 EXPECT_EQ(0u, orig_pixels[3][2]);
470 EXPECT_EQ(255u, orig_pixels[3][3]);
471 EXPECT_EQ(239u, orig_pixels[4][0]);
472 EXPECT_EQ(255u, orig_pixels[4][1]);
473 EXPECT_EQ(255u, orig_pixels[4][2]);
474 EXPECT_EQ(255u, orig_pixels[4][3]);
475 EXPECT_EQ(88u, orig_pixels[5][0]);
476 EXPECT_EQ(114u, orig_pixels[5][1]);
477 EXPECT_EQ(127u, orig_pixels[5][2]);
478 EXPECT_EQ(224u, orig_pixels[5][3]);
479
480 for (int i = 0; i < 1280; ++i) {
481 orig_pixels[i][0] = i;
482 orig_pixels[i][1] = i / 2;
483 orig_pixels[i][2] = i / 3;
484 orig_pixels[i][3] = i;
485 }
486 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
487 ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
488 }
489 }
490
TEST_F(LibYUVPlanarTest,TestARGBColorMatrix)491 TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
492 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
493 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
494 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
495
496 // Matrix for Sepia.
497 SIMD_ALIGNED(static const int8 kRGBToSepia[]) = {
498 17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
499 24 / 2, 98 / 2, 50 / 2, 0, 0, 0, 0, 64, // Copy alpha.
500 };
501 memset(orig_pixels, 0, sizeof(orig_pixels));
502
503 // Test blue
504 orig_pixels[0][0] = 255u;
505 orig_pixels[0][1] = 0u;
506 orig_pixels[0][2] = 0u;
507 orig_pixels[0][3] = 128u;
508 // Test green
509 orig_pixels[1][0] = 0u;
510 orig_pixels[1][1] = 255u;
511 orig_pixels[1][2] = 0u;
512 orig_pixels[1][3] = 0u;
513 // Test red
514 orig_pixels[2][0] = 0u;
515 orig_pixels[2][1] = 0u;
516 orig_pixels[2][2] = 255u;
517 orig_pixels[2][3] = 255u;
518 // Test color
519 orig_pixels[3][0] = 16u;
520 orig_pixels[3][1] = 64u;
521 orig_pixels[3][2] = 192u;
522 orig_pixels[3][3] = 224u;
523 // Do 16 to test asm version.
524 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
525 &kRGBToSepia[0], 16, 1);
526 EXPECT_EQ(31u, dst_pixels_opt[0][0]);
527 EXPECT_EQ(43u, dst_pixels_opt[0][1]);
528 EXPECT_EQ(47u, dst_pixels_opt[0][2]);
529 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
530 EXPECT_EQ(135u, dst_pixels_opt[1][0]);
531 EXPECT_EQ(175u, dst_pixels_opt[1][1]);
532 EXPECT_EQ(195u, dst_pixels_opt[1][2]);
533 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
534 EXPECT_EQ(67u, dst_pixels_opt[2][0]);
535 EXPECT_EQ(87u, dst_pixels_opt[2][1]);
536 EXPECT_EQ(99u, dst_pixels_opt[2][2]);
537 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
538 EXPECT_EQ(87u, dst_pixels_opt[3][0]);
539 EXPECT_EQ(112u, dst_pixels_opt[3][1]);
540 EXPECT_EQ(127u, dst_pixels_opt[3][2]);
541 EXPECT_EQ(224u, dst_pixels_opt[3][3]);
542
543 for (int i = 0; i < 1280; ++i) {
544 orig_pixels[i][0] = i;
545 orig_pixels[i][1] = i / 2;
546 orig_pixels[i][2] = i / 3;
547 orig_pixels[i][3] = i;
548 }
549 MaskCpuFlags(disable_cpu_flags_);
550 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
551 &kRGBToSepia[0], 1280, 1);
552 MaskCpuFlags(benchmark_cpu_info_);
553
554 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
555 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
556 &kRGBToSepia[0], 1280, 1);
557 }
558
559 for (int i = 0; i < 1280; ++i) {
560 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
561 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
562 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
563 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
564 }
565 }
566
TEST_F(LibYUVPlanarTest,TestRGBColorMatrix)567 TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
568 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
569
570 // Matrix for Sepia.
571 SIMD_ALIGNED(static const int8 kRGBToSepia[]) = {
572 17, 68, 35, 0, 22, 88, 45, 0,
573 24, 98, 50, 0, 0, 0, 0, 0, // Unused but makes matrix 16 bytes.
574 };
575 memset(orig_pixels, 0, sizeof(orig_pixels));
576
577 // Test blue
578 orig_pixels[0][0] = 255u;
579 orig_pixels[0][1] = 0u;
580 orig_pixels[0][2] = 0u;
581 orig_pixels[0][3] = 128u;
582 // Test green
583 orig_pixels[1][0] = 0u;
584 orig_pixels[1][1] = 255u;
585 orig_pixels[1][2] = 0u;
586 orig_pixels[1][3] = 0u;
587 // Test red
588 orig_pixels[2][0] = 0u;
589 orig_pixels[2][1] = 0u;
590 orig_pixels[2][2] = 255u;
591 orig_pixels[2][3] = 255u;
592 // Test color
593 orig_pixels[3][0] = 16u;
594 orig_pixels[3][1] = 64u;
595 orig_pixels[3][2] = 192u;
596 orig_pixels[3][3] = 224u;
597 // Do 16 to test asm version.
598 RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
599 EXPECT_EQ(31u, orig_pixels[0][0]);
600 EXPECT_EQ(43u, orig_pixels[0][1]);
601 EXPECT_EQ(47u, orig_pixels[0][2]);
602 EXPECT_EQ(128u, orig_pixels[0][3]);
603 EXPECT_EQ(135u, orig_pixels[1][0]);
604 EXPECT_EQ(175u, orig_pixels[1][1]);
605 EXPECT_EQ(195u, orig_pixels[1][2]);
606 EXPECT_EQ(0u, orig_pixels[1][3]);
607 EXPECT_EQ(67u, orig_pixels[2][0]);
608 EXPECT_EQ(87u, orig_pixels[2][1]);
609 EXPECT_EQ(99u, orig_pixels[2][2]);
610 EXPECT_EQ(255u, orig_pixels[2][3]);
611 EXPECT_EQ(87u, orig_pixels[3][0]);
612 EXPECT_EQ(112u, orig_pixels[3][1]);
613 EXPECT_EQ(127u, orig_pixels[3][2]);
614 EXPECT_EQ(224u, orig_pixels[3][3]);
615
616 for (int i = 0; i < 1280; ++i) {
617 orig_pixels[i][0] = i;
618 orig_pixels[i][1] = i / 2;
619 orig_pixels[i][2] = i / 3;
620 orig_pixels[i][3] = i;
621 }
622 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
623 RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
624 }
625 }
626
TEST_F(LibYUVPlanarTest,TestARGBColorTable)627 TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
628 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
629 memset(orig_pixels, 0, sizeof(orig_pixels));
630
631 // Matrix for Sepia.
632 static const uint8 kARGBTable[256 * 4] = {
633 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
634 };
635
636 orig_pixels[0][0] = 0u;
637 orig_pixels[0][1] = 0u;
638 orig_pixels[0][2] = 0u;
639 orig_pixels[0][3] = 0u;
640 orig_pixels[1][0] = 1u;
641 orig_pixels[1][1] = 1u;
642 orig_pixels[1][2] = 1u;
643 orig_pixels[1][3] = 1u;
644 orig_pixels[2][0] = 2u;
645 orig_pixels[2][1] = 2u;
646 orig_pixels[2][2] = 2u;
647 orig_pixels[2][3] = 2u;
648 orig_pixels[3][0] = 0u;
649 orig_pixels[3][1] = 1u;
650 orig_pixels[3][2] = 2u;
651 orig_pixels[3][3] = 3u;
652 // Do 16 to test asm version.
653 ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
654 EXPECT_EQ(1u, orig_pixels[0][0]);
655 EXPECT_EQ(2u, orig_pixels[0][1]);
656 EXPECT_EQ(3u, orig_pixels[0][2]);
657 EXPECT_EQ(4u, orig_pixels[0][3]);
658 EXPECT_EQ(5u, orig_pixels[1][0]);
659 EXPECT_EQ(6u, orig_pixels[1][1]);
660 EXPECT_EQ(7u, orig_pixels[1][2]);
661 EXPECT_EQ(8u, orig_pixels[1][3]);
662 EXPECT_EQ(9u, orig_pixels[2][0]);
663 EXPECT_EQ(10u, orig_pixels[2][1]);
664 EXPECT_EQ(11u, orig_pixels[2][2]);
665 EXPECT_EQ(12u, orig_pixels[2][3]);
666 EXPECT_EQ(1u, orig_pixels[3][0]);
667 EXPECT_EQ(6u, orig_pixels[3][1]);
668 EXPECT_EQ(11u, orig_pixels[3][2]);
669 EXPECT_EQ(16u, orig_pixels[3][3]);
670
671 for (int i = 0; i < 1280; ++i) {
672 orig_pixels[i][0] = i;
673 orig_pixels[i][1] = i / 2;
674 orig_pixels[i][2] = i / 3;
675 orig_pixels[i][3] = i;
676 }
677 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
678 ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
679 }
680 }
681
682 // Same as TestARGBColorTable except alpha does not change.
TEST_F(LibYUVPlanarTest,TestRGBColorTable)683 TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
684 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
685 memset(orig_pixels, 0, sizeof(orig_pixels));
686
687 // Matrix for Sepia.
688 static const uint8 kARGBTable[256 * 4] = {
689 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
690 };
691
692 orig_pixels[0][0] = 0u;
693 orig_pixels[0][1] = 0u;
694 orig_pixels[0][2] = 0u;
695 orig_pixels[0][3] = 0u;
696 orig_pixels[1][0] = 1u;
697 orig_pixels[1][1] = 1u;
698 orig_pixels[1][2] = 1u;
699 orig_pixels[1][3] = 1u;
700 orig_pixels[2][0] = 2u;
701 orig_pixels[2][1] = 2u;
702 orig_pixels[2][2] = 2u;
703 orig_pixels[2][3] = 2u;
704 orig_pixels[3][0] = 0u;
705 orig_pixels[3][1] = 1u;
706 orig_pixels[3][2] = 2u;
707 orig_pixels[3][3] = 3u;
708 // Do 16 to test asm version.
709 RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
710 EXPECT_EQ(1u, orig_pixels[0][0]);
711 EXPECT_EQ(2u, orig_pixels[0][1]);
712 EXPECT_EQ(3u, orig_pixels[0][2]);
713 EXPECT_EQ(0u, orig_pixels[0][3]); // Alpha unchanged.
714 EXPECT_EQ(5u, orig_pixels[1][0]);
715 EXPECT_EQ(6u, orig_pixels[1][1]);
716 EXPECT_EQ(7u, orig_pixels[1][2]);
717 EXPECT_EQ(1u, orig_pixels[1][3]); // Alpha unchanged.
718 EXPECT_EQ(9u, orig_pixels[2][0]);
719 EXPECT_EQ(10u, orig_pixels[2][1]);
720 EXPECT_EQ(11u, orig_pixels[2][2]);
721 EXPECT_EQ(2u, orig_pixels[2][3]); // Alpha unchanged.
722 EXPECT_EQ(1u, orig_pixels[3][0]);
723 EXPECT_EQ(6u, orig_pixels[3][1]);
724 EXPECT_EQ(11u, orig_pixels[3][2]);
725 EXPECT_EQ(3u, orig_pixels[3][3]); // Alpha unchanged.
726
727 for (int i = 0; i < 1280; ++i) {
728 orig_pixels[i][0] = i;
729 orig_pixels[i][1] = i / 2;
730 orig_pixels[i][2] = i / 3;
731 orig_pixels[i][3] = i;
732 }
733 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
734 RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
735 }
736 }
737
TEST_F(LibYUVPlanarTest,TestARGBQuantize)738 TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
739 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
740
741 for (int i = 0; i < 1280; ++i) {
742 orig_pixels[i][0] = i;
743 orig_pixels[i][1] = i / 2;
744 orig_pixels[i][2] = i / 3;
745 orig_pixels[i][3] = i;
746 }
747 ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
748 1280, 1);
749
750 for (int i = 0; i < 1280; ++i) {
751 EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]);
752 EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]);
753 EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]);
754 EXPECT_EQ(i & 255, orig_pixels[i][3]);
755 }
756 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
757 ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
758 1280, 1);
759 }
760 }
761
TEST_F(LibYUVPlanarTest,TestARGBMirror)762 TEST_F(LibYUVPlanarTest, TestARGBMirror) {
763 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
764 SIMD_ALIGNED(uint8 dst_pixels[1280][4]);
765
766 for (int i = 0; i < 1280; ++i) {
767 orig_pixels[i][0] = i;
768 orig_pixels[i][1] = i / 2;
769 orig_pixels[i][2] = i / 3;
770 orig_pixels[i][3] = i / 4;
771 }
772 ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
773
774 for (int i = 0; i < 1280; ++i) {
775 EXPECT_EQ(i & 255, dst_pixels[1280 - 1 - i][0]);
776 EXPECT_EQ((i / 2) & 255, dst_pixels[1280 - 1 - i][1]);
777 EXPECT_EQ((i / 3) & 255, dst_pixels[1280 - 1 - i][2]);
778 EXPECT_EQ((i / 4) & 255, dst_pixels[1280 - 1 - i][3]);
779 }
780 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
781 ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
782 }
783 }
784
TEST_F(LibYUVPlanarTest,TestShade)785 TEST_F(LibYUVPlanarTest, TestShade) {
786 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
787 SIMD_ALIGNED(uint8 shade_pixels[1280][4]);
788 memset(orig_pixels, 0, sizeof(orig_pixels));
789
790 orig_pixels[0][0] = 10u;
791 orig_pixels[0][1] = 20u;
792 orig_pixels[0][2] = 40u;
793 orig_pixels[0][3] = 80u;
794 orig_pixels[1][0] = 0u;
795 orig_pixels[1][1] = 0u;
796 orig_pixels[1][2] = 0u;
797 orig_pixels[1][3] = 255u;
798 orig_pixels[2][0] = 0u;
799 orig_pixels[2][1] = 0u;
800 orig_pixels[2][2] = 0u;
801 orig_pixels[2][3] = 0u;
802 orig_pixels[3][0] = 0u;
803 orig_pixels[3][1] = 0u;
804 orig_pixels[3][2] = 0u;
805 orig_pixels[3][3] = 0u;
806 // Do 8 pixels to allow opt version to be used.
807 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff);
808 EXPECT_EQ(10u, shade_pixels[0][0]);
809 EXPECT_EQ(20u, shade_pixels[0][1]);
810 EXPECT_EQ(40u, shade_pixels[0][2]);
811 EXPECT_EQ(40u, shade_pixels[0][3]);
812 EXPECT_EQ(0u, shade_pixels[1][0]);
813 EXPECT_EQ(0u, shade_pixels[1][1]);
814 EXPECT_EQ(0u, shade_pixels[1][2]);
815 EXPECT_EQ(128u, shade_pixels[1][3]);
816 EXPECT_EQ(0u, shade_pixels[2][0]);
817 EXPECT_EQ(0u, shade_pixels[2][1]);
818 EXPECT_EQ(0u, shade_pixels[2][2]);
819 EXPECT_EQ(0u, shade_pixels[2][3]);
820 EXPECT_EQ(0u, shade_pixels[3][0]);
821 EXPECT_EQ(0u, shade_pixels[3][1]);
822 EXPECT_EQ(0u, shade_pixels[3][2]);
823 EXPECT_EQ(0u, shade_pixels[3][3]);
824
825 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080);
826 EXPECT_EQ(5u, shade_pixels[0][0]);
827 EXPECT_EQ(10u, shade_pixels[0][1]);
828 EXPECT_EQ(20u, shade_pixels[0][2]);
829 EXPECT_EQ(40u, shade_pixels[0][3]);
830
831 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080);
832 EXPECT_EQ(5u, shade_pixels[0][0]);
833 EXPECT_EQ(5u, shade_pixels[0][1]);
834 EXPECT_EQ(5u, shade_pixels[0][2]);
835 EXPECT_EQ(5u, shade_pixels[0][3]);
836
837 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
838 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1,
839 0x80808080);
840 }
841 }
842
TEST_F(LibYUVPlanarTest,TestARGBInterpolate)843 TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
844 SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);
845 SIMD_ALIGNED(uint8 orig_pixels_1[1280][4]);
846 SIMD_ALIGNED(uint8 interpolate_pixels[1280][4]);
847 memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
848 memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
849
850 orig_pixels_0[0][0] = 16u;
851 orig_pixels_0[0][1] = 32u;
852 orig_pixels_0[0][2] = 64u;
853 orig_pixels_0[0][3] = 128u;
854 orig_pixels_0[1][0] = 0u;
855 orig_pixels_0[1][1] = 0u;
856 orig_pixels_0[1][2] = 0u;
857 orig_pixels_0[1][3] = 255u;
858 orig_pixels_0[2][0] = 0u;
859 orig_pixels_0[2][1] = 0u;
860 orig_pixels_0[2][2] = 0u;
861 orig_pixels_0[2][3] = 0u;
862 orig_pixels_0[3][0] = 0u;
863 orig_pixels_0[3][1] = 0u;
864 orig_pixels_0[3][2] = 0u;
865 orig_pixels_0[3][3] = 0u;
866
867 orig_pixels_1[0][0] = 0u;
868 orig_pixels_1[0][1] = 0u;
869 orig_pixels_1[0][2] = 0u;
870 orig_pixels_1[0][3] = 0u;
871 orig_pixels_1[1][0] = 0u;
872 orig_pixels_1[1][1] = 0u;
873 orig_pixels_1[1][2] = 0u;
874 orig_pixels_1[1][3] = 0u;
875 orig_pixels_1[2][0] = 0u;
876 orig_pixels_1[2][1] = 0u;
877 orig_pixels_1[2][2] = 0u;
878 orig_pixels_1[2][3] = 0u;
879 orig_pixels_1[3][0] = 255u;
880 orig_pixels_1[3][1] = 255u;
881 orig_pixels_1[3][2] = 255u;
882 orig_pixels_1[3][3] = 255u;
883
884 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
885 &interpolate_pixels[0][0], 0, 4, 1, 128);
886 EXPECT_EQ(8u, interpolate_pixels[0][0]);
887 EXPECT_EQ(16u, interpolate_pixels[0][1]);
888 EXPECT_EQ(32u, interpolate_pixels[0][2]);
889 EXPECT_EQ(64u, interpolate_pixels[0][3]);
890 EXPECT_EQ(0u, interpolate_pixels[1][0]);
891 EXPECT_EQ(0u, interpolate_pixels[1][1]);
892 EXPECT_EQ(0u, interpolate_pixels[1][2]);
893 EXPECT_EQ(128u, interpolate_pixels[1][3]);
894 EXPECT_EQ(0u, interpolate_pixels[2][0]);
895 EXPECT_EQ(0u, interpolate_pixels[2][1]);
896 EXPECT_EQ(0u, interpolate_pixels[2][2]);
897 EXPECT_EQ(0u, interpolate_pixels[2][3]);
898 EXPECT_EQ(128u, interpolate_pixels[3][0]);
899 EXPECT_EQ(128u, interpolate_pixels[3][1]);
900 EXPECT_EQ(128u, interpolate_pixels[3][2]);
901 EXPECT_EQ(128u, interpolate_pixels[3][3]);
902
903 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
904 &interpolate_pixels[0][0], 0, 4, 1, 0);
905 EXPECT_EQ(16u, interpolate_pixels[0][0]);
906 EXPECT_EQ(32u, interpolate_pixels[0][1]);
907 EXPECT_EQ(64u, interpolate_pixels[0][2]);
908 EXPECT_EQ(128u, interpolate_pixels[0][3]);
909
910 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
911 &interpolate_pixels[0][0], 0, 4, 1, 192);
912
913 EXPECT_EQ(4u, interpolate_pixels[0][0]);
914 EXPECT_EQ(8u, interpolate_pixels[0][1]);
915 EXPECT_EQ(16u, interpolate_pixels[0][2]);
916 EXPECT_EQ(32u, interpolate_pixels[0][3]);
917
918 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
919 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
920 &interpolate_pixels[0][0], 0, 1280, 1, 128);
921 }
922 }
923
TEST_F(LibYUVPlanarTest,TestInterpolatePlane)924 TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
925 SIMD_ALIGNED(uint8 orig_pixels_0[1280]);
926 SIMD_ALIGNED(uint8 orig_pixels_1[1280]);
927 SIMD_ALIGNED(uint8 interpolate_pixels[1280]);
928 memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
929 memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
930
931 orig_pixels_0[0] = 16u;
932 orig_pixels_0[1] = 32u;
933 orig_pixels_0[2] = 64u;
934 orig_pixels_0[3] = 128u;
935 orig_pixels_0[4] = 0u;
936 orig_pixels_0[5] = 0u;
937 orig_pixels_0[6] = 0u;
938 orig_pixels_0[7] = 255u;
939 orig_pixels_0[8] = 0u;
940 orig_pixels_0[9] = 0u;
941 orig_pixels_0[10] = 0u;
942 orig_pixels_0[11] = 0u;
943 orig_pixels_0[12] = 0u;
944 orig_pixels_0[13] = 0u;
945 orig_pixels_0[14] = 0u;
946 orig_pixels_0[15] = 0u;
947
948 orig_pixels_1[0] = 0u;
949 orig_pixels_1[1] = 0u;
950 orig_pixels_1[2] = 0u;
951 orig_pixels_1[3] = 0u;
952 orig_pixels_1[4] = 0u;
953 orig_pixels_1[5] = 0u;
954 orig_pixels_1[6] = 0u;
955 orig_pixels_1[7] = 0u;
956 orig_pixels_1[8] = 0u;
957 orig_pixels_1[9] = 0u;
958 orig_pixels_1[10] = 0u;
959 orig_pixels_1[11] = 0u;
960 orig_pixels_1[12] = 255u;
961 orig_pixels_1[13] = 255u;
962 orig_pixels_1[14] = 255u;
963 orig_pixels_1[15] = 255u;
964
965 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
966 &interpolate_pixels[0], 0, 16, 1, 128);
967 EXPECT_EQ(8u, interpolate_pixels[0]);
968 EXPECT_EQ(16u, interpolate_pixels[1]);
969 EXPECT_EQ(32u, interpolate_pixels[2]);
970 EXPECT_EQ(64u, interpolate_pixels[3]);
971 EXPECT_EQ(0u, interpolate_pixels[4]);
972 EXPECT_EQ(0u, interpolate_pixels[5]);
973 EXPECT_EQ(0u, interpolate_pixels[6]);
974 EXPECT_EQ(128u, interpolate_pixels[7]);
975 EXPECT_EQ(0u, interpolate_pixels[8]);
976 EXPECT_EQ(0u, interpolate_pixels[9]);
977 EXPECT_EQ(0u, interpolate_pixels[10]);
978 EXPECT_EQ(0u, interpolate_pixels[11]);
979 EXPECT_EQ(128u, interpolate_pixels[12]);
980 EXPECT_EQ(128u, interpolate_pixels[13]);
981 EXPECT_EQ(128u, interpolate_pixels[14]);
982 EXPECT_EQ(128u, interpolate_pixels[15]);
983
984 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
985 &interpolate_pixels[0], 0, 16, 1, 0);
986 EXPECT_EQ(16u, interpolate_pixels[0]);
987 EXPECT_EQ(32u, interpolate_pixels[1]);
988 EXPECT_EQ(64u, interpolate_pixels[2]);
989 EXPECT_EQ(128u, interpolate_pixels[3]);
990
991 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
992 &interpolate_pixels[0], 0, 16, 1, 192);
993
994 EXPECT_EQ(4u, interpolate_pixels[0]);
995 EXPECT_EQ(8u, interpolate_pixels[1]);
996 EXPECT_EQ(16u, interpolate_pixels[2]);
997 EXPECT_EQ(32u, interpolate_pixels[3]);
998
999 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1000 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1001 &interpolate_pixels[0], 0, 1280, 1, 123);
1002 }
1003 }
1004
1005 #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
1006 N, NEG, OFF) \
1007 TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
1008 const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
1009 const int kHeight = benchmark_height_; \
1010 const int kStrideA = \
1011 (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
1012 const int kStrideB = \
1013 (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
1014 align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \
1015 align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF); \
1016 align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \
1017 align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \
1018 for (int i = 0; i < kStrideA * kHeight; ++i) { \
1019 src_argb_a[i + OFF] = (fastrand() & 0xff); \
1020 src_argb_b[i + OFF] = (fastrand() & 0xff); \
1021 } \
1022 MaskCpuFlags(disable_cpu_flags_); \
1023 ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1024 dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP); \
1025 MaskCpuFlags(benchmark_cpu_info_); \
1026 for (int i = 0; i < benchmark_iterations_; ++i) { \
1027 ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1028 dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP); \
1029 } \
1030 for (int i = 0; i < kStrideB * kHeight; ++i) { \
1031 EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
1032 } \
1033 free_aligned_buffer_page_end(src_argb_a); \
1034 free_aligned_buffer_page_end(src_argb_b); \
1035 free_aligned_buffer_page_end(dst_argb_c); \
1036 free_aligned_buffer_page_end(dst_argb_opt); \
1037 }
1038
1039 #define TESTINTERPOLATE(TERP) \
1040 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ - 1, TERP, _Any, +, 0) \
1041 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
1042 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \
1043 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
1044
1045 TESTINTERPOLATE(0)
1046 TESTINTERPOLATE(64)
1047 TESTINTERPOLATE(128)
1048 TESTINTERPOLATE(192)
1049 TESTINTERPOLATE(255)
1050
TestBlend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1051 static int TestBlend(int width,
1052 int height,
1053 int benchmark_iterations,
1054 int disable_cpu_flags,
1055 int benchmark_cpu_info,
1056 int invert,
1057 int off) {
1058 if (width < 1) {
1059 width = 1;
1060 }
1061 const int kBpp = 4;
1062 const int kStride = width * kBpp;
1063 align_buffer_page_end(src_argb_a, kStride * height + off);
1064 align_buffer_page_end(src_argb_b, kStride * height + off);
1065 align_buffer_page_end(dst_argb_c, kStride * height);
1066 align_buffer_page_end(dst_argb_opt, kStride * height);
1067 for (int i = 0; i < kStride * height; ++i) {
1068 src_argb_a[i + off] = (fastrand() & 0xff);
1069 src_argb_b[i + off] = (fastrand() & 0xff);
1070 }
1071 ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
1072 height);
1073 ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width,
1074 height);
1075 memset(dst_argb_c, 255, kStride * height);
1076 memset(dst_argb_opt, 255, kStride * height);
1077
1078 MaskCpuFlags(disable_cpu_flags);
1079 ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1080 kStride, width, invert * height);
1081 MaskCpuFlags(benchmark_cpu_info);
1082 for (int i = 0; i < benchmark_iterations; ++i) {
1083 ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride,
1084 dst_argb_opt, kStride, width, invert * height);
1085 }
1086 int max_diff = 0;
1087 for (int i = 0; i < kStride * height; ++i) {
1088 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1089 static_cast<int>(dst_argb_opt[i]));
1090 if (abs_diff > max_diff) {
1091 max_diff = abs_diff;
1092 }
1093 }
1094 free_aligned_buffer_page_end(src_argb_a);
1095 free_aligned_buffer_page_end(src_argb_b);
1096 free_aligned_buffer_page_end(dst_argb_c);
1097 free_aligned_buffer_page_end(dst_argb_opt);
1098 return max_diff;
1099 }
1100
TEST_F(LibYUVPlanarTest,ARGBBlend_Any)1101 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
1102 int max_diff =
1103 TestBlend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1104 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1105 EXPECT_LE(max_diff, 1);
1106 }
1107
TEST_F(LibYUVPlanarTest,ARGBBlend_Unaligned)1108 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
1109 int max_diff =
1110 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1111 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1112 EXPECT_LE(max_diff, 1);
1113 }
1114
TEST_F(LibYUVPlanarTest,ARGBBlend_Invert)1115 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
1116 int max_diff =
1117 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1118 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1119 EXPECT_LE(max_diff, 1);
1120 }
1121
TEST_F(LibYUVPlanarTest,ARGBBlend_Opt)1122 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
1123 int max_diff =
1124 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1125 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1126 EXPECT_LE(max_diff, 1);
1127 }
1128
TestBlendPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1129 static void TestBlendPlane(int width,
1130 int height,
1131 int benchmark_iterations,
1132 int disable_cpu_flags,
1133 int benchmark_cpu_info,
1134 int invert,
1135 int off) {
1136 if (width < 1) {
1137 width = 1;
1138 }
1139 const int kBpp = 1;
1140 const int kStride = width * kBpp;
1141 align_buffer_page_end(src_argb_a, kStride * height + off);
1142 align_buffer_page_end(src_argb_b, kStride * height + off);
1143 align_buffer_page_end(src_argb_alpha, kStride * height + off);
1144 align_buffer_page_end(dst_argb_c, kStride * height + off);
1145 align_buffer_page_end(dst_argb_opt, kStride * height + off);
1146 memset(dst_argb_c, 255, kStride * height + off);
1147 memset(dst_argb_opt, 255, kStride * height + off);
1148
1149 // Test source is maintained exactly if alpha is 255.
1150 for (int i = 0; i < width; ++i) {
1151 src_argb_a[i + off] = i & 255;
1152 src_argb_b[i + off] = 255 - (i & 255);
1153 }
1154 memset(src_argb_alpha + off, 255, width);
1155 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1156 src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1157 for (int i = 0; i < width; ++i) {
1158 EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
1159 }
1160 // Test destination is maintained exactly if alpha is 0.
1161 memset(src_argb_alpha + off, 0, width);
1162 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1163 src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1164 for (int i = 0; i < width; ++i) {
1165 EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
1166 }
1167 for (int i = 0; i < kStride * height; ++i) {
1168 src_argb_a[i + off] = (fastrand() & 0xff);
1169 src_argb_b[i + off] = (fastrand() & 0xff);
1170 src_argb_alpha[i + off] = (fastrand() & 0xff);
1171 }
1172
1173 MaskCpuFlags(disable_cpu_flags);
1174 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1175 src_argb_alpha + off, width, dst_argb_c + off, width, width,
1176 invert * height);
1177 MaskCpuFlags(benchmark_cpu_info);
1178 for (int i = 0; i < benchmark_iterations; ++i) {
1179 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1180 src_argb_alpha + off, width, dst_argb_opt + off, width, width,
1181 invert * height);
1182 }
1183 for (int i = 0; i < kStride * height; ++i) {
1184 EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
1185 }
1186 free_aligned_buffer_page_end(src_argb_a);
1187 free_aligned_buffer_page_end(src_argb_b);
1188 free_aligned_buffer_page_end(src_argb_alpha);
1189 free_aligned_buffer_page_end(dst_argb_c);
1190 free_aligned_buffer_page_end(dst_argb_opt);
1191 return;
1192 }
1193
TEST_F(LibYUVPlanarTest,BlendPlane_Opt)1194 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
1195 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1196 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1197 }
TEST_F(LibYUVPlanarTest,BlendPlane_Unaligned)1198 TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
1199 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1200 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1201 }
TEST_F(LibYUVPlanarTest,BlendPlane_Any)1202 TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
1203 TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1204 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1205 }
TEST_F(LibYUVPlanarTest,BlendPlane_Invert)1206 TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
1207 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1208 disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
1209 }
1210
1211 #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
1212
TestI420Blend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1213 static void TestI420Blend(int width,
1214 int height,
1215 int benchmark_iterations,
1216 int disable_cpu_flags,
1217 int benchmark_cpu_info,
1218 int invert,
1219 int off) {
1220 width = ((width) > 0) ? (width) : 1;
1221 const int kStrideUV = SUBSAMPLE(width, 2);
1222 const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
1223 align_buffer_page_end(src_y0, width * height + off);
1224 align_buffer_page_end(src_u0, kSizeUV + off);
1225 align_buffer_page_end(src_v0, kSizeUV + off);
1226 align_buffer_page_end(src_y1, width * height + off);
1227 align_buffer_page_end(src_u1, kSizeUV + off);
1228 align_buffer_page_end(src_v1, kSizeUV + off);
1229 align_buffer_page_end(src_a, width * height + off);
1230 align_buffer_page_end(dst_y_c, width * height + off);
1231 align_buffer_page_end(dst_u_c, kSizeUV + off);
1232 align_buffer_page_end(dst_v_c, kSizeUV + off);
1233 align_buffer_page_end(dst_y_opt, width * height + off);
1234 align_buffer_page_end(dst_u_opt, kSizeUV + off);
1235 align_buffer_page_end(dst_v_opt, kSizeUV + off);
1236
1237 MemRandomize(src_y0, width * height + off);
1238 MemRandomize(src_u0, kSizeUV + off);
1239 MemRandomize(src_v0, kSizeUV + off);
1240 MemRandomize(src_y1, width * height + off);
1241 MemRandomize(src_u1, kSizeUV + off);
1242 MemRandomize(src_v1, kSizeUV + off);
1243 MemRandomize(src_a, width * height + off);
1244 memset(dst_y_c, 255, width * height + off);
1245 memset(dst_u_c, 255, kSizeUV + off);
1246 memset(dst_v_c, 255, kSizeUV + off);
1247 memset(dst_y_opt, 255, width * height + off);
1248 memset(dst_u_opt, 255, kSizeUV + off);
1249 memset(dst_v_opt, 255, kSizeUV + off);
1250
1251 MaskCpuFlags(disable_cpu_flags);
1252 I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1253 kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1254 src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width,
1255 dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width,
1256 invert * height);
1257 MaskCpuFlags(benchmark_cpu_info);
1258 for (int i = 0; i < benchmark_iterations; ++i) {
1259 I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1260 kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1261 src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off,
1262 width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV,
1263 width, invert * height);
1264 }
1265 for (int i = 0; i < width * height; ++i) {
1266 EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
1267 }
1268 for (int i = 0; i < kSizeUV; ++i) {
1269 EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]);
1270 EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]);
1271 }
1272 free_aligned_buffer_page_end(src_y0);
1273 free_aligned_buffer_page_end(src_u0);
1274 free_aligned_buffer_page_end(src_v0);
1275 free_aligned_buffer_page_end(src_y1);
1276 free_aligned_buffer_page_end(src_u1);
1277 free_aligned_buffer_page_end(src_v1);
1278 free_aligned_buffer_page_end(src_a);
1279 free_aligned_buffer_page_end(dst_y_c);
1280 free_aligned_buffer_page_end(dst_u_c);
1281 free_aligned_buffer_page_end(dst_v_c);
1282 free_aligned_buffer_page_end(dst_y_opt);
1283 free_aligned_buffer_page_end(dst_u_opt);
1284 free_aligned_buffer_page_end(dst_v_opt);
1285 return;
1286 }
1287
TEST_F(LibYUVPlanarTest,I420Blend_Opt)1288 TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
1289 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1290 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1291 }
TEST_F(LibYUVPlanarTest,I420Blend_Unaligned)1292 TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
1293 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1294 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1295 }
1296
1297 // TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable.
TEST_F(LibYUVPlanarTest,DISABLED_I420Blend_Any)1298 TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
1299 TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1300 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1301 }
TEST_F(LibYUVPlanarTest,I420Blend_Invert)1302 TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
1303 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1304 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1305 }
1306
TEST_F(LibYUVPlanarTest,TestAffine)1307 TEST_F(LibYUVPlanarTest, TestAffine) {
1308 SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);
1309 SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]);
1310
1311 for (int i = 0; i < 1280; ++i) {
1312 for (int j = 0; j < 4; ++j) {
1313 orig_pixels_0[i][j] = i;
1314 }
1315 }
1316
1317 float uv_step[4] = {0.f, 0.f, 0.75f, 0.f};
1318
1319 ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step,
1320 1280);
1321 EXPECT_EQ(0u, interpolate_pixels_C[0][0]);
1322 EXPECT_EQ(96u, interpolate_pixels_C[128][0]);
1323 EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
1324
1325 #if defined(HAS_ARGBAFFINEROW_SSE2)
1326 SIMD_ALIGNED(uint8 interpolate_pixels_Opt[1280][4]);
1327 ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1328 uv_step, 1280);
1329 EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
1330
1331 int has_sse2 = TestCpuFlag(kCpuHasSSE2);
1332 if (has_sse2) {
1333 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1334 ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1335 uv_step, 1280);
1336 }
1337 }
1338 #endif
1339 }
1340
TEST_F(LibYUVPlanarTest,TestCopyPlane)1341 TEST_F(LibYUVPlanarTest, TestCopyPlane) {
1342 int err = 0;
1343 int yw = benchmark_width_;
1344 int yh = benchmark_height_;
1345 int b = 12;
1346 int i, j;
1347
1348 int y_plane_size = (yw + b * 2) * (yh + b * 2);
1349 align_buffer_page_end(orig_y, y_plane_size);
1350 align_buffer_page_end(dst_c, y_plane_size);
1351 align_buffer_page_end(dst_opt, y_plane_size);
1352
1353 memset(orig_y, 0, y_plane_size);
1354 memset(dst_c, 0, y_plane_size);
1355 memset(dst_opt, 0, y_plane_size);
1356
1357 // Fill image buffers with random data.
1358 for (i = b; i < (yh + b); ++i) {
1359 for (j = b; j < (yw + b); ++j) {
1360 orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
1361 }
1362 }
1363
1364 // Fill destination buffers with random data.
1365 for (i = 0; i < y_plane_size; ++i) {
1366 uint8 random_number = fastrand() & 0x7f;
1367 dst_c[i] = random_number;
1368 dst_opt[i] = dst_c[i];
1369 }
1370
1371 int y_off = b * (yw + b * 2) + b;
1372
1373 int y_st = yw + b * 2;
1374 int stride = 8;
1375
1376 // Disable all optimizations.
1377 MaskCpuFlags(disable_cpu_flags_);
1378 for (j = 0; j < benchmark_iterations_; j++) {
1379 CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
1380 }
1381
1382 // Enable optimizations.
1383 MaskCpuFlags(benchmark_cpu_info_);
1384 for (j = 0; j < benchmark_iterations_; j++) {
1385 CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
1386 }
1387
1388 for (i = 0; i < y_plane_size; ++i) {
1389 if (dst_c[i] != dst_opt[i])
1390 ++err;
1391 }
1392
1393 free_aligned_buffer_page_end(orig_y);
1394 free_aligned_buffer_page_end(dst_c);
1395 free_aligned_buffer_page_end(dst_opt);
1396
1397 EXPECT_EQ(0, err);
1398 }
1399
TestMultiply(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1400 static int TestMultiply(int width,
1401 int height,
1402 int benchmark_iterations,
1403 int disable_cpu_flags,
1404 int benchmark_cpu_info,
1405 int invert,
1406 int off) {
1407 if (width < 1) {
1408 width = 1;
1409 }
1410 const int kBpp = 4;
1411 const int kStride = width * kBpp;
1412 align_buffer_page_end(src_argb_a, kStride * height + off);
1413 align_buffer_page_end(src_argb_b, kStride * height + off);
1414 align_buffer_page_end(dst_argb_c, kStride * height);
1415 align_buffer_page_end(dst_argb_opt, kStride * height);
1416 for (int i = 0; i < kStride * height; ++i) {
1417 src_argb_a[i + off] = (fastrand() & 0xff);
1418 src_argb_b[i + off] = (fastrand() & 0xff);
1419 }
1420 memset(dst_argb_c, 0, kStride * height);
1421 memset(dst_argb_opt, 0, kStride * height);
1422
1423 MaskCpuFlags(disable_cpu_flags);
1424 ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1425 kStride, width, invert * height);
1426 MaskCpuFlags(benchmark_cpu_info);
1427 for (int i = 0; i < benchmark_iterations; ++i) {
1428 ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride,
1429 dst_argb_opt, kStride, width, invert * height);
1430 }
1431 int max_diff = 0;
1432 for (int i = 0; i < kStride * height; ++i) {
1433 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1434 static_cast<int>(dst_argb_opt[i]));
1435 if (abs_diff > max_diff) {
1436 max_diff = abs_diff;
1437 }
1438 }
1439 free_aligned_buffer_page_end(src_argb_a);
1440 free_aligned_buffer_page_end(src_argb_b);
1441 free_aligned_buffer_page_end(dst_argb_c);
1442 free_aligned_buffer_page_end(dst_argb_opt);
1443 return max_diff;
1444 }
1445
TEST_F(LibYUVPlanarTest,ARGBMultiply_Any)1446 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
1447 int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_,
1448 benchmark_iterations_, disable_cpu_flags_,
1449 benchmark_cpu_info_, +1, 0);
1450 EXPECT_LE(max_diff, 1);
1451 }
1452
TEST_F(LibYUVPlanarTest,ARGBMultiply_Unaligned)1453 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
1454 int max_diff =
1455 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1456 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1457 EXPECT_LE(max_diff, 1);
1458 }
1459
TEST_F(LibYUVPlanarTest,ARGBMultiply_Invert)1460 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
1461 int max_diff =
1462 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1463 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1464 EXPECT_LE(max_diff, 1);
1465 }
1466
TEST_F(LibYUVPlanarTest,ARGBMultiply_Opt)1467 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
1468 int max_diff =
1469 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1470 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1471 EXPECT_LE(max_diff, 1);
1472 }
1473
TestAdd(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1474 static int TestAdd(int width,
1475 int height,
1476 int benchmark_iterations,
1477 int disable_cpu_flags,
1478 int benchmark_cpu_info,
1479 int invert,
1480 int off) {
1481 if (width < 1) {
1482 width = 1;
1483 }
1484 const int kBpp = 4;
1485 const int kStride = width * kBpp;
1486 align_buffer_page_end(src_argb_a, kStride * height + off);
1487 align_buffer_page_end(src_argb_b, kStride * height + off);
1488 align_buffer_page_end(dst_argb_c, kStride * height);
1489 align_buffer_page_end(dst_argb_opt, kStride * height);
1490 for (int i = 0; i < kStride * height; ++i) {
1491 src_argb_a[i + off] = (fastrand() & 0xff);
1492 src_argb_b[i + off] = (fastrand() & 0xff);
1493 }
1494 memset(dst_argb_c, 0, kStride * height);
1495 memset(dst_argb_opt, 0, kStride * height);
1496
1497 MaskCpuFlags(disable_cpu_flags);
1498 ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1499 kStride, width, invert * height);
1500 MaskCpuFlags(benchmark_cpu_info);
1501 for (int i = 0; i < benchmark_iterations; ++i) {
1502 ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt,
1503 kStride, width, invert * height);
1504 }
1505 int max_diff = 0;
1506 for (int i = 0; i < kStride * height; ++i) {
1507 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1508 static_cast<int>(dst_argb_opt[i]));
1509 if (abs_diff > max_diff) {
1510 max_diff = abs_diff;
1511 }
1512 }
1513 free_aligned_buffer_page_end(src_argb_a);
1514 free_aligned_buffer_page_end(src_argb_b);
1515 free_aligned_buffer_page_end(dst_argb_c);
1516 free_aligned_buffer_page_end(dst_argb_opt);
1517 return max_diff;
1518 }
1519
TEST_F(LibYUVPlanarTest,ARGBAdd_Any)1520 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
1521 int max_diff =
1522 TestAdd(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1523 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1524 EXPECT_LE(max_diff, 1);
1525 }
1526
TEST_F(LibYUVPlanarTest,ARGBAdd_Unaligned)1527 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) {
1528 int max_diff =
1529 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1530 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1531 EXPECT_LE(max_diff, 1);
1532 }
1533
TEST_F(LibYUVPlanarTest,ARGBAdd_Invert)1534 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) {
1535 int max_diff =
1536 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1537 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1538 EXPECT_LE(max_diff, 1);
1539 }
1540
TEST_F(LibYUVPlanarTest,ARGBAdd_Opt)1541 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) {
1542 int max_diff =
1543 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1544 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1545 EXPECT_LE(max_diff, 1);
1546 }
1547
TestSubtract(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1548 static int TestSubtract(int width,
1549 int height,
1550 int benchmark_iterations,
1551 int disable_cpu_flags,
1552 int benchmark_cpu_info,
1553 int invert,
1554 int off) {
1555 if (width < 1) {
1556 width = 1;
1557 }
1558 const int kBpp = 4;
1559 const int kStride = width * kBpp;
1560 align_buffer_page_end(src_argb_a, kStride * height + off);
1561 align_buffer_page_end(src_argb_b, kStride * height + off);
1562 align_buffer_page_end(dst_argb_c, kStride * height);
1563 align_buffer_page_end(dst_argb_opt, kStride * height);
1564 for (int i = 0; i < kStride * height; ++i) {
1565 src_argb_a[i + off] = (fastrand() & 0xff);
1566 src_argb_b[i + off] = (fastrand() & 0xff);
1567 }
1568 memset(dst_argb_c, 0, kStride * height);
1569 memset(dst_argb_opt, 0, kStride * height);
1570
1571 MaskCpuFlags(disable_cpu_flags);
1572 ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1573 kStride, width, invert * height);
1574 MaskCpuFlags(benchmark_cpu_info);
1575 for (int i = 0; i < benchmark_iterations; ++i) {
1576 ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride,
1577 dst_argb_opt, kStride, width, invert * height);
1578 }
1579 int max_diff = 0;
1580 for (int i = 0; i < kStride * height; ++i) {
1581 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1582 static_cast<int>(dst_argb_opt[i]));
1583 if (abs_diff > max_diff) {
1584 max_diff = abs_diff;
1585 }
1586 }
1587 free_aligned_buffer_page_end(src_argb_a);
1588 free_aligned_buffer_page_end(src_argb_b);
1589 free_aligned_buffer_page_end(dst_argb_c);
1590 free_aligned_buffer_page_end(dst_argb_opt);
1591 return max_diff;
1592 }
1593
TEST_F(LibYUVPlanarTest,ARGBSubtract_Any)1594 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
1595 int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_,
1596 benchmark_iterations_, disable_cpu_flags_,
1597 benchmark_cpu_info_, +1, 0);
1598 EXPECT_LE(max_diff, 1);
1599 }
1600
TEST_F(LibYUVPlanarTest,ARGBSubtract_Unaligned)1601 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) {
1602 int max_diff =
1603 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1604 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1605 EXPECT_LE(max_diff, 1);
1606 }
1607
TEST_F(LibYUVPlanarTest,ARGBSubtract_Invert)1608 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) {
1609 int max_diff =
1610 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1611 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1612 EXPECT_LE(max_diff, 1);
1613 }
1614
TEST_F(LibYUVPlanarTest,ARGBSubtract_Opt)1615 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) {
1616 int max_diff =
1617 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1618 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1619 EXPECT_LE(max_diff, 1);
1620 }
1621
TestSobel(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1622 static int TestSobel(int width,
1623 int height,
1624 int benchmark_iterations,
1625 int disable_cpu_flags,
1626 int benchmark_cpu_info,
1627 int invert,
1628 int off) {
1629 if (width < 1) {
1630 width = 1;
1631 }
1632 const int kBpp = 4;
1633 const int kStride = width * kBpp;
1634 align_buffer_page_end(src_argb_a, kStride * height + off);
1635 align_buffer_page_end(dst_argb_c, kStride * height);
1636 align_buffer_page_end(dst_argb_opt, kStride * height);
1637 memset(src_argb_a, 0, kStride * height + off);
1638 for (int i = 0; i < kStride * height; ++i) {
1639 src_argb_a[i + off] = (fastrand() & 0xff);
1640 }
1641 memset(dst_argb_c, 0, kStride * height);
1642 memset(dst_argb_opt, 0, kStride * height);
1643
1644 MaskCpuFlags(disable_cpu_flags);
1645 ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width,
1646 invert * height);
1647 MaskCpuFlags(benchmark_cpu_info);
1648 for (int i = 0; i < benchmark_iterations; ++i) {
1649 ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
1650 invert * height);
1651 }
1652 int max_diff = 0;
1653 for (int i = 0; i < kStride * height; ++i) {
1654 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1655 static_cast<int>(dst_argb_opt[i]));
1656 if (abs_diff > max_diff) {
1657 max_diff = abs_diff;
1658 }
1659 }
1660 free_aligned_buffer_page_end(src_argb_a);
1661 free_aligned_buffer_page_end(dst_argb_c);
1662 free_aligned_buffer_page_end(dst_argb_opt);
1663 return max_diff;
1664 }
1665
TEST_F(LibYUVPlanarTest,ARGBSobel_Any)1666 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
1667 int max_diff =
1668 TestSobel(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1669 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1670 EXPECT_EQ(0, max_diff);
1671 }
1672
TEST_F(LibYUVPlanarTest,ARGBSobel_Unaligned)1673 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) {
1674 int max_diff =
1675 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1676 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1677 EXPECT_EQ(0, max_diff);
1678 }
1679
TEST_F(LibYUVPlanarTest,ARGBSobel_Invert)1680 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) {
1681 int max_diff =
1682 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1683 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1684 EXPECT_EQ(0, max_diff);
1685 }
1686
TEST_F(LibYUVPlanarTest,ARGBSobel_Opt)1687 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) {
1688 int max_diff =
1689 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1690 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1691 EXPECT_EQ(0, max_diff);
1692 }
1693
TestSobelToPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1694 static int TestSobelToPlane(int width,
1695 int height,
1696 int benchmark_iterations,
1697 int disable_cpu_flags,
1698 int benchmark_cpu_info,
1699 int invert,
1700 int off) {
1701 if (width < 1) {
1702 width = 1;
1703 }
1704 const int kSrcBpp = 4;
1705 const int kDstBpp = 1;
1706 const int kSrcStride = (width * kSrcBpp + 15) & ~15;
1707 const int kDstStride = (width * kDstBpp + 15) & ~15;
1708 align_buffer_page_end(src_argb_a, kSrcStride * height + off);
1709 align_buffer_page_end(dst_argb_c, kDstStride * height);
1710 align_buffer_page_end(dst_argb_opt, kDstStride * height);
1711 memset(src_argb_a, 0, kSrcStride * height + off);
1712 for (int i = 0; i < kSrcStride * height; ++i) {
1713 src_argb_a[i + off] = (fastrand() & 0xff);
1714 }
1715 memset(dst_argb_c, 0, kDstStride * height);
1716 memset(dst_argb_opt, 0, kDstStride * height);
1717
1718 MaskCpuFlags(disable_cpu_flags);
1719 ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width,
1720 invert * height);
1721 MaskCpuFlags(benchmark_cpu_info);
1722 for (int i = 0; i < benchmark_iterations; ++i) {
1723 ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride,
1724 width, invert * height);
1725 }
1726 int max_diff = 0;
1727 for (int i = 0; i < kDstStride * height; ++i) {
1728 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1729 static_cast<int>(dst_argb_opt[i]));
1730 if (abs_diff > max_diff) {
1731 max_diff = abs_diff;
1732 }
1733 }
1734 free_aligned_buffer_page_end(src_argb_a);
1735 free_aligned_buffer_page_end(dst_argb_c);
1736 free_aligned_buffer_page_end(dst_argb_opt);
1737 return max_diff;
1738 }
1739
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Any)1740 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
1741 int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_,
1742 benchmark_iterations_, disable_cpu_flags_,
1743 benchmark_cpu_info_, +1, 0);
1744 EXPECT_EQ(0, max_diff);
1745 }
1746
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Unaligned)1747 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) {
1748 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1749 benchmark_iterations_, disable_cpu_flags_,
1750 benchmark_cpu_info_, +1, 1);
1751 EXPECT_EQ(0, max_diff);
1752 }
1753
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Invert)1754 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) {
1755 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1756 benchmark_iterations_, disable_cpu_flags_,
1757 benchmark_cpu_info_, -1, 0);
1758 EXPECT_EQ(0, max_diff);
1759 }
1760
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Opt)1761 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) {
1762 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1763 benchmark_iterations_, disable_cpu_flags_,
1764 benchmark_cpu_info_, +1, 0);
1765 EXPECT_EQ(0, max_diff);
1766 }
1767
TestSobelXY(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1768 static int TestSobelXY(int width,
1769 int height,
1770 int benchmark_iterations,
1771 int disable_cpu_flags,
1772 int benchmark_cpu_info,
1773 int invert,
1774 int off) {
1775 if (width < 1) {
1776 width = 1;
1777 }
1778 const int kBpp = 4;
1779 const int kStride = width * kBpp;
1780 align_buffer_page_end(src_argb_a, kStride * height + off);
1781 align_buffer_page_end(dst_argb_c, kStride * height);
1782 align_buffer_page_end(dst_argb_opt, kStride * height);
1783 memset(src_argb_a, 0, kStride * height + off);
1784 for (int i = 0; i < kStride * height; ++i) {
1785 src_argb_a[i + off] = (fastrand() & 0xff);
1786 }
1787 memset(dst_argb_c, 0, kStride * height);
1788 memset(dst_argb_opt, 0, kStride * height);
1789
1790 MaskCpuFlags(disable_cpu_flags);
1791 ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width,
1792 invert * height);
1793 MaskCpuFlags(benchmark_cpu_info);
1794 for (int i = 0; i < benchmark_iterations; ++i) {
1795 ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
1796 invert * height);
1797 }
1798 int max_diff = 0;
1799 for (int i = 0; i < kStride * height; ++i) {
1800 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1801 static_cast<int>(dst_argb_opt[i]));
1802 if (abs_diff > max_diff) {
1803 max_diff = abs_diff;
1804 }
1805 }
1806 free_aligned_buffer_page_end(src_argb_a);
1807 free_aligned_buffer_page_end(dst_argb_c);
1808 free_aligned_buffer_page_end(dst_argb_opt);
1809 return max_diff;
1810 }
1811
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Any)1812 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
1813 int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_,
1814 benchmark_iterations_, disable_cpu_flags_,
1815 benchmark_cpu_info_, +1, 0);
1816 EXPECT_EQ(0, max_diff);
1817 }
1818
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Unaligned)1819 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) {
1820 int max_diff =
1821 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1822 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1823 EXPECT_EQ(0, max_diff);
1824 }
1825
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Invert)1826 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) {
1827 int max_diff =
1828 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1829 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1830 EXPECT_EQ(0, max_diff);
1831 }
1832
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Opt)1833 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) {
1834 int max_diff =
1835 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1836 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1837 EXPECT_EQ(0, max_diff);
1838 }
1839
TestBlur(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int radius)1840 static int TestBlur(int width,
1841 int height,
1842 int benchmark_iterations,
1843 int disable_cpu_flags,
1844 int benchmark_cpu_info,
1845 int invert,
1846 int off,
1847 int radius) {
1848 if (width < 1) {
1849 width = 1;
1850 }
1851 const int kBpp = 4;
1852 const int kStride = width * kBpp;
1853 align_buffer_page_end(src_argb_a, kStride * height + off);
1854 align_buffer_page_end(dst_cumsum, width * height * 16);
1855 align_buffer_page_end(dst_argb_c, kStride * height);
1856 align_buffer_page_end(dst_argb_opt, kStride * height);
1857 for (int i = 0; i < kStride * height; ++i) {
1858 src_argb_a[i + off] = (fastrand() & 0xff);
1859 }
1860 memset(dst_cumsum, 0, width * height * 16);
1861 memset(dst_argb_c, 0, kStride * height);
1862 memset(dst_argb_opt, 0, kStride * height);
1863
1864 MaskCpuFlags(disable_cpu_flags);
1865 ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
1866 reinterpret_cast<int32*>(dst_cumsum), width * 4, width,
1867 invert * height, radius);
1868 MaskCpuFlags(benchmark_cpu_info);
1869 for (int i = 0; i < benchmark_iterations; ++i) {
1870 ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
1871 reinterpret_cast<int32*>(dst_cumsum), width * 4, width,
1872 invert * height, radius);
1873 }
1874 int max_diff = 0;
1875 for (int i = 0; i < kStride * height; ++i) {
1876 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1877 static_cast<int>(dst_argb_opt[i]));
1878 if (abs_diff > max_diff) {
1879 max_diff = abs_diff;
1880 }
1881 }
1882 free_aligned_buffer_page_end(src_argb_a);
1883 free_aligned_buffer_page_end(dst_cumsum);
1884 free_aligned_buffer_page_end(dst_argb_c);
1885 free_aligned_buffer_page_end(dst_argb_opt);
1886 return max_diff;
1887 }
1888
1889 static const int kBlurSize = 55;
TEST_F(LibYUVPlanarTest,ARGBBlur_Any)1890 TEST_F(LibYUVPlanarTest, ARGBBlur_Any) {
1891 int max_diff =
1892 TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1893 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
1894 EXPECT_LE(max_diff, 1);
1895 }
1896
TEST_F(LibYUVPlanarTest,ARGBBlur_Unaligned)1897 TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) {
1898 int max_diff =
1899 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1900 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
1901 EXPECT_LE(max_diff, 1);
1902 }
1903
TEST_F(LibYUVPlanarTest,ARGBBlur_Invert)1904 TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) {
1905 int max_diff =
1906 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1907 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
1908 EXPECT_LE(max_diff, 1);
1909 }
1910
TEST_F(LibYUVPlanarTest,ARGBBlur_Opt)1911 TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) {
1912 int max_diff =
1913 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1914 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
1915 EXPECT_LE(max_diff, 1);
1916 }
1917
1918 static const int kBlurSmallSize = 5;
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Any)1919 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) {
1920 int max_diff =
1921 TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1922 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
1923 EXPECT_LE(max_diff, 1);
1924 }
1925
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Unaligned)1926 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) {
1927 int max_diff =
1928 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1929 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
1930 EXPECT_LE(max_diff, 1);
1931 }
1932
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Invert)1933 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) {
1934 int max_diff =
1935 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1936 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
1937 EXPECT_LE(max_diff, 1);
1938 }
1939
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Opt)1940 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) {
1941 int max_diff =
1942 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1943 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
1944 EXPECT_LE(max_diff, 1);
1945 }
1946
TEST_F(LibYUVPlanarTest,TestARGBPolynomial)1947 TEST_F(LibYUVPlanarTest, TestARGBPolynomial) {
1948 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
1949 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
1950 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
1951 memset(orig_pixels, 0, sizeof(orig_pixels));
1952
1953 SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
1954 0.94230f, -3.03300f, -2.92500f, 0.f, // C0
1955 0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x
1956 0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x
1957 0.0f, 0.000006965f, 0.000008781f, 0.f, // C3 x * x * x
1958 };
1959
1960 // Test blue
1961 orig_pixels[0][0] = 255u;
1962 orig_pixels[0][1] = 0u;
1963 orig_pixels[0][2] = 0u;
1964 orig_pixels[0][3] = 128u;
1965 // Test green
1966 orig_pixels[1][0] = 0u;
1967 orig_pixels[1][1] = 255u;
1968 orig_pixels[1][2] = 0u;
1969 orig_pixels[1][3] = 0u;
1970 // Test red
1971 orig_pixels[2][0] = 0u;
1972 orig_pixels[2][1] = 0u;
1973 orig_pixels[2][2] = 255u;
1974 orig_pixels[2][3] = 255u;
1975 // Test white
1976 orig_pixels[3][0] = 255u;
1977 orig_pixels[3][1] = 255u;
1978 orig_pixels[3][2] = 255u;
1979 orig_pixels[3][3] = 255u;
1980 // Test color
1981 orig_pixels[4][0] = 16u;
1982 orig_pixels[4][1] = 64u;
1983 orig_pixels[4][2] = 192u;
1984 orig_pixels[4][3] = 224u;
1985 // Do 16 to test asm version.
1986 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
1987 &kWarmifyPolynomial[0], 16, 1);
1988 EXPECT_EQ(235u, dst_pixels_opt[0][0]);
1989 EXPECT_EQ(0u, dst_pixels_opt[0][1]);
1990 EXPECT_EQ(0u, dst_pixels_opt[0][2]);
1991 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
1992 EXPECT_EQ(0u, dst_pixels_opt[1][0]);
1993 EXPECT_EQ(233u, dst_pixels_opt[1][1]);
1994 EXPECT_EQ(0u, dst_pixels_opt[1][2]);
1995 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
1996 EXPECT_EQ(0u, dst_pixels_opt[2][0]);
1997 EXPECT_EQ(0u, dst_pixels_opt[2][1]);
1998 EXPECT_EQ(241u, dst_pixels_opt[2][2]);
1999 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2000 EXPECT_EQ(235u, dst_pixels_opt[3][0]);
2001 EXPECT_EQ(233u, dst_pixels_opt[3][1]);
2002 EXPECT_EQ(241u, dst_pixels_opt[3][2]);
2003 EXPECT_EQ(255u, dst_pixels_opt[3][3]);
2004 EXPECT_EQ(10u, dst_pixels_opt[4][0]);
2005 EXPECT_EQ(59u, dst_pixels_opt[4][1]);
2006 EXPECT_EQ(188u, dst_pixels_opt[4][2]);
2007 EXPECT_EQ(224u, dst_pixels_opt[4][3]);
2008
2009 for (int i = 0; i < 1280; ++i) {
2010 orig_pixels[i][0] = i;
2011 orig_pixels[i][1] = i / 2;
2012 orig_pixels[i][2] = i / 3;
2013 orig_pixels[i][3] = i;
2014 }
2015
2016 MaskCpuFlags(disable_cpu_flags_);
2017 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2018 &kWarmifyPolynomial[0], 1280, 1);
2019 MaskCpuFlags(benchmark_cpu_info_);
2020
2021 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2022 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2023 &kWarmifyPolynomial[0], 1280, 1);
2024 }
2025
2026 for (int i = 0; i < 1280; ++i) {
2027 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2028 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2029 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2030 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2031 }
2032 }
2033
TestHalfFloatPlane(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale,int mask)2034 int TestHalfFloatPlane(int benchmark_width,
2035 int benchmark_height,
2036 int benchmark_iterations,
2037 int disable_cpu_flags,
2038 int benchmark_cpu_info,
2039 float scale,
2040 int mask) {
2041 int i, j;
2042 const int y_plane_size = benchmark_width * benchmark_height * 2;
2043
2044 align_buffer_page_end(orig_y, y_plane_size * 3);
2045 uint8* dst_opt = orig_y + y_plane_size;
2046 uint8* dst_c = orig_y + y_plane_size * 2;
2047
2048 MemRandomize(orig_y, y_plane_size);
2049 memset(dst_c, 0, y_plane_size);
2050 memset(dst_opt, 1, y_plane_size);
2051
2052 for (i = 0; i < y_plane_size / 2; ++i) {
2053 reinterpret_cast<uint16*>(orig_y)[i] &= mask;
2054 }
2055
2056 // Disable all optimizations.
2057 MaskCpuFlags(disable_cpu_flags);
2058 for (j = 0; j < benchmark_iterations; j++) {
2059 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2,
2060 reinterpret_cast<uint16*>(dst_c), benchmark_width * 2, scale,
2061 benchmark_width, benchmark_height);
2062 }
2063
2064 // Enable optimizations.
2065 MaskCpuFlags(benchmark_cpu_info);
2066 for (j = 0; j < benchmark_iterations; j++) {
2067 HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2,
2068 reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2,
2069 scale, benchmark_width, benchmark_height);
2070 }
2071
2072 int max_diff = 0;
2073 for (i = 0; i < y_plane_size / 2; ++i) {
2074 int abs_diff = abs(static_cast<int>(reinterpret_cast<uint16*>(dst_c)[i]) -
2075 static_cast<int>(reinterpret_cast<uint16*>(dst_opt)[i]));
2076 if (abs_diff > max_diff) {
2077 max_diff = abs_diff;
2078 }
2079 }
2080
2081 free_aligned_buffer_page_end(orig_y);
2082 return max_diff;
2083 }
2084
2085 #if defined(__arm__)
EnableFlushDenormalToZero(void)2086 static void EnableFlushDenormalToZero(void) {
2087 uint32_t cw;
2088 __asm__ __volatile__(
2089 "vmrs %0, fpscr \n"
2090 "orr %0, %0, #0x1000000 \n"
2091 "vmsr fpscr, %0 \n"
2092 : "=r"(cw)::"memory");
2093 }
2094 #endif
2095
2096 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
2097 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
2098 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
2099
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_denormal)2100 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
2101 // 32 bit arm rounding on denormal case is off by 1 compared to C.
2102 #if defined(__arm__)
2103 EnableFlushDenormalToZero();
2104 #endif
2105 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2106 benchmark_iterations_, disable_cpu_flags_,
2107 benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
2108 EXPECT_EQ(0, diff);
2109 }
2110
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_One)2111 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
2112 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2113 benchmark_iterations_, disable_cpu_flags_,
2114 benchmark_cpu_info_, 1.0f, 65535);
2115 EXPECT_LE(diff, 1);
2116 }
2117
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_Opt)2118 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
2119 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2120 benchmark_iterations_, disable_cpu_flags_,
2121 benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
2122 EXPECT_EQ(0, diff);
2123 }
2124
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_10bit_Opt)2125 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
2126 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2127 benchmark_iterations_, disable_cpu_flags_,
2128 benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
2129 EXPECT_EQ(0, diff);
2130 }
2131
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_9bit_Opt)2132 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
2133 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2134 benchmark_iterations_, disable_cpu_flags_,
2135 benchmark_cpu_info_, 1.0f / 512.0f, 511);
2136 EXPECT_EQ(0, diff);
2137 }
2138
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Opt)2139 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
2140 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2141 benchmark_iterations_, disable_cpu_flags_,
2142 benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
2143 EXPECT_EQ(0, diff);
2144 }
2145
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Offby1)2146 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
2147 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2148 benchmark_iterations_, disable_cpu_flags_,
2149 benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
2150 EXPECT_EQ(0, diff);
2151 }
2152
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_One)2153 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
2154 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2155 benchmark_iterations_, disable_cpu_flags_,
2156 benchmark_cpu_info_, 1.0f, 2047);
2157 EXPECT_EQ(0, diff);
2158 }
2159
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_12bit_One)2160 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
2161 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2162 benchmark_iterations_, disable_cpu_flags_,
2163 benchmark_cpu_info_, 1.0f, 4095);
2164 EXPECT_LE(diff, 1);
2165 }
2166
TEST_F(LibYUVPlanarTest,TestARGBLumaColorTable)2167 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
2168 SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
2169 SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
2170 SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
2171 memset(orig_pixels, 0, sizeof(orig_pixels));
2172
2173 align_buffer_page_end(lumacolortable, 32768);
2174 int v = 0;
2175 for (int i = 0; i < 32768; ++i) {
2176 lumacolortable[i] = v;
2177 v += 3;
2178 }
2179 // Test blue
2180 orig_pixels[0][0] = 255u;
2181 orig_pixels[0][1] = 0u;
2182 orig_pixels[0][2] = 0u;
2183 orig_pixels[0][3] = 128u;
2184 // Test green
2185 orig_pixels[1][0] = 0u;
2186 orig_pixels[1][1] = 255u;
2187 orig_pixels[1][2] = 0u;
2188 orig_pixels[1][3] = 0u;
2189 // Test red
2190 orig_pixels[2][0] = 0u;
2191 orig_pixels[2][1] = 0u;
2192 orig_pixels[2][2] = 255u;
2193 orig_pixels[2][3] = 255u;
2194 // Test color
2195 orig_pixels[3][0] = 16u;
2196 orig_pixels[3][1] = 64u;
2197 orig_pixels[3][2] = 192u;
2198 orig_pixels[3][3] = 224u;
2199 // Do 16 to test asm version.
2200 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2201 &lumacolortable[0], 16, 1);
2202 EXPECT_EQ(253u, dst_pixels_opt[0][0]);
2203 EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2204 EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2205 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2206 EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2207 EXPECT_EQ(253u, dst_pixels_opt[1][1]);
2208 EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2209 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2210 EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2211 EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2212 EXPECT_EQ(253u, dst_pixels_opt[2][2]);
2213 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2214 EXPECT_EQ(48u, dst_pixels_opt[3][0]);
2215 EXPECT_EQ(192u, dst_pixels_opt[3][1]);
2216 EXPECT_EQ(64u, dst_pixels_opt[3][2]);
2217 EXPECT_EQ(224u, dst_pixels_opt[3][3]);
2218
2219 for (int i = 0; i < 1280; ++i) {
2220 orig_pixels[i][0] = i;
2221 orig_pixels[i][1] = i / 2;
2222 orig_pixels[i][2] = i / 3;
2223 orig_pixels[i][3] = i;
2224 }
2225
2226 MaskCpuFlags(disable_cpu_flags_);
2227 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2228 lumacolortable, 1280, 1);
2229 MaskCpuFlags(benchmark_cpu_info_);
2230
2231 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2232 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2233 lumacolortable, 1280, 1);
2234 }
2235 for (int i = 0; i < 1280; ++i) {
2236 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2237 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2238 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2239 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2240 }
2241
2242 free_aligned_buffer_page_end(lumacolortable);
2243 }
2244
TEST_F(LibYUVPlanarTest,TestARGBCopyAlpha)2245 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
2246 const int kSize = benchmark_width_ * benchmark_height_ * 4;
2247 align_buffer_page_end(orig_pixels, kSize);
2248 align_buffer_page_end(dst_pixels_opt, kSize);
2249 align_buffer_page_end(dst_pixels_c, kSize);
2250
2251 MemRandomize(orig_pixels, kSize);
2252 MemRandomize(dst_pixels_opt, kSize);
2253 memcpy(dst_pixels_c, dst_pixels_opt, kSize);
2254
2255 MaskCpuFlags(disable_cpu_flags_);
2256 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c,
2257 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2258 MaskCpuFlags(benchmark_cpu_info_);
2259
2260 for (int i = 0; i < benchmark_iterations_; ++i) {
2261 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt,
2262 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2263 }
2264 for (int i = 0; i < kSize; ++i) {
2265 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2266 }
2267
2268 free_aligned_buffer_page_end(dst_pixels_c);
2269 free_aligned_buffer_page_end(dst_pixels_opt);
2270 free_aligned_buffer_page_end(orig_pixels);
2271 }
2272
TEST_F(LibYUVPlanarTest,TestARGBExtractAlpha)2273 TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
2274 const int kPixels = benchmark_width_ * benchmark_height_;
2275 align_buffer_page_end(src_pixels, kPixels * 4);
2276 align_buffer_page_end(dst_pixels_opt, kPixels);
2277 align_buffer_page_end(dst_pixels_c, kPixels);
2278
2279 MemRandomize(src_pixels, kPixels * 4);
2280 MemRandomize(dst_pixels_opt, kPixels);
2281 memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
2282
2283 MaskCpuFlags(disable_cpu_flags_);
2284 ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2285 benchmark_width_, benchmark_width_, benchmark_height_);
2286 MaskCpuFlags(benchmark_cpu_info_);
2287
2288 for (int i = 0; i < benchmark_iterations_; ++i) {
2289 ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2290 benchmark_width_, benchmark_width_, benchmark_height_);
2291 }
2292 for (int i = 0; i < kPixels; ++i) {
2293 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2294 }
2295
2296 free_aligned_buffer_page_end(dst_pixels_c);
2297 free_aligned_buffer_page_end(dst_pixels_opt);
2298 free_aligned_buffer_page_end(src_pixels);
2299 }
2300
TEST_F(LibYUVPlanarTest,TestARGBCopyYToAlpha)2301 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
2302 const int kPixels = benchmark_width_ * benchmark_height_;
2303 align_buffer_page_end(orig_pixels, kPixels);
2304 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
2305 align_buffer_page_end(dst_pixels_c, kPixels * 4);
2306
2307 MemRandomize(orig_pixels, kPixels);
2308 MemRandomize(dst_pixels_opt, kPixels * 4);
2309 memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4);
2310
2311 MaskCpuFlags(disable_cpu_flags_);
2312 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2313 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2314 MaskCpuFlags(benchmark_cpu_info_);
2315
2316 for (int i = 0; i < benchmark_iterations_; ++i) {
2317 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2318 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2319 }
2320 for (int i = 0; i < kPixels * 4; ++i) {
2321 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2322 }
2323
2324 free_aligned_buffer_page_end(dst_pixels_c);
2325 free_aligned_buffer_page_end(dst_pixels_opt);
2326 free_aligned_buffer_page_end(orig_pixels);
2327 }
2328
TestARGBRect(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int bpp)2329 static int TestARGBRect(int width,
2330 int height,
2331 int benchmark_iterations,
2332 int disable_cpu_flags,
2333 int benchmark_cpu_info,
2334 int invert,
2335 int off,
2336 int bpp) {
2337 if (width < 1) {
2338 width = 1;
2339 }
2340 const int kStride = width * bpp;
2341 const int kSize = kStride * height;
2342 const uint32 v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
2343
2344 align_buffer_page_end(dst_argb_c, kSize + off);
2345 align_buffer_page_end(dst_argb_opt, kSize + off);
2346
2347 MemRandomize(dst_argb_c + off, kSize);
2348 memcpy(dst_argb_opt + off, dst_argb_c + off, kSize);
2349
2350 MaskCpuFlags(disable_cpu_flags);
2351 if (bpp == 4) {
2352 ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32);
2353 } else {
2354 SetPlane(dst_argb_c + off, kStride, width, invert * height, v32);
2355 }
2356
2357 MaskCpuFlags(benchmark_cpu_info);
2358 for (int i = 0; i < benchmark_iterations; ++i) {
2359 if (bpp == 4) {
2360 ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32);
2361 } else {
2362 SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32);
2363 }
2364 }
2365 int max_diff = 0;
2366 for (int i = 0; i < kStride * height; ++i) {
2367 int abs_diff = abs(static_cast<int>(dst_argb_c[i + off]) -
2368 static_cast<int>(dst_argb_opt[i + off]));
2369 if (abs_diff > max_diff) {
2370 max_diff = abs_diff;
2371 }
2372 }
2373 free_aligned_buffer_page_end(dst_argb_c);
2374 free_aligned_buffer_page_end(dst_argb_opt);
2375 return max_diff;
2376 }
2377
TEST_F(LibYUVPlanarTest,ARGBRect_Any)2378 TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
2379 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
2380 benchmark_iterations_, disable_cpu_flags_,
2381 benchmark_cpu_info_, +1, 0, 4);
2382 EXPECT_EQ(0, max_diff);
2383 }
2384
TEST_F(LibYUVPlanarTest,ARGBRect_Unaligned)2385 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) {
2386 int max_diff =
2387 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2388 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4);
2389 EXPECT_EQ(0, max_diff);
2390 }
2391
TEST_F(LibYUVPlanarTest,ARGBRect_Invert)2392 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) {
2393 int max_diff =
2394 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2395 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4);
2396 EXPECT_EQ(0, max_diff);
2397 }
2398
TEST_F(LibYUVPlanarTest,ARGBRect_Opt)2399 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
2400 int max_diff =
2401 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2402 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4);
2403 EXPECT_EQ(0, max_diff);
2404 }
2405
TEST_F(LibYUVPlanarTest,SetPlane_Any)2406 TEST_F(LibYUVPlanarTest, SetPlane_Any) {
2407 int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
2408 benchmark_iterations_, disable_cpu_flags_,
2409 benchmark_cpu_info_, +1, 0, 1);
2410 EXPECT_EQ(0, max_diff);
2411 }
2412
TEST_F(LibYUVPlanarTest,SetPlane_Unaligned)2413 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) {
2414 int max_diff =
2415 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2416 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
2417 EXPECT_EQ(0, max_diff);
2418 }
2419
TEST_F(LibYUVPlanarTest,SetPlane_Invert)2420 TEST_F(LibYUVPlanarTest, SetPlane_Invert) {
2421 int max_diff =
2422 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2423 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
2424 EXPECT_EQ(0, max_diff);
2425 }
2426
TEST_F(LibYUVPlanarTest,SetPlane_Opt)2427 TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
2428 int max_diff =
2429 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2430 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
2431 EXPECT_EQ(0, max_diff);
2432 }
2433
TEST_F(LibYUVPlanarTest,MergeUVPlane_Opt)2434 TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
2435 const int kPixels = benchmark_width_ * benchmark_height_;
2436 align_buffer_page_end(src_pixels, kPixels * 2);
2437 align_buffer_page_end(tmp_pixels_u, kPixels);
2438 align_buffer_page_end(tmp_pixels_v, kPixels);
2439 align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2440 align_buffer_page_end(dst_pixels_c, kPixels * 2);
2441
2442 MemRandomize(src_pixels, kPixels * 2);
2443 MemRandomize(tmp_pixels_u, kPixels);
2444 MemRandomize(tmp_pixels_v, kPixels);
2445 MemRandomize(dst_pixels_opt, kPixels * 2);
2446 MemRandomize(dst_pixels_c, kPixels * 2);
2447
2448 MaskCpuFlags(disable_cpu_flags_);
2449 SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2450 tmp_pixels_v, benchmark_width_, benchmark_width_,
2451 benchmark_height_);
2452 MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2453 dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2454 benchmark_height_);
2455 MaskCpuFlags(benchmark_cpu_info_);
2456
2457 SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2458 tmp_pixels_v, benchmark_width_, benchmark_width_,
2459 benchmark_height_);
2460
2461 for (int i = 0; i < benchmark_iterations_; ++i) {
2462 MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2463 dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2464 benchmark_height_);
2465 }
2466
2467 for (int i = 0; i < kPixels * 2; ++i) {
2468 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2469 }
2470
2471 free_aligned_buffer_page_end(src_pixels);
2472 free_aligned_buffer_page_end(tmp_pixels_u);
2473 free_aligned_buffer_page_end(tmp_pixels_v);
2474 free_aligned_buffer_page_end(dst_pixels_opt);
2475 free_aligned_buffer_page_end(dst_pixels_c);
2476 }
2477
TEST_F(LibYUVPlanarTest,SplitUVPlane_Opt)2478 TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
2479 const int kPixels = benchmark_width_ * benchmark_height_;
2480 align_buffer_page_end(src_pixels, kPixels * 2);
2481 align_buffer_page_end(tmp_pixels_u, kPixels);
2482 align_buffer_page_end(tmp_pixels_v, kPixels);
2483 align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2484 align_buffer_page_end(dst_pixels_c, kPixels * 2);
2485
2486 MemRandomize(src_pixels, kPixels * 2);
2487 MemRandomize(tmp_pixels_u, kPixels);
2488 MemRandomize(tmp_pixels_v, kPixels);
2489 MemRandomize(dst_pixels_opt, kPixels * 2);
2490 MemRandomize(dst_pixels_c, kPixels * 2);
2491
2492 MaskCpuFlags(disable_cpu_flags_);
2493 SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2494 tmp_pixels_v, benchmark_width_, benchmark_width_,
2495 benchmark_height_);
2496 MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2497 dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2498 benchmark_height_);
2499 MaskCpuFlags(benchmark_cpu_info_);
2500
2501 for (int i = 0; i < benchmark_iterations_; ++i) {
2502 SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u,
2503 benchmark_width_, tmp_pixels_v, benchmark_width_,
2504 benchmark_width_, benchmark_height_);
2505 }
2506 MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2507 dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2508 benchmark_height_);
2509
2510 for (int i = 0; i < kPixels * 2; ++i) {
2511 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2512 }
2513
2514 free_aligned_buffer_page_end(src_pixels);
2515 free_aligned_buffer_page_end(tmp_pixels_u);
2516 free_aligned_buffer_page_end(tmp_pixels_v);
2517 free_aligned_buffer_page_end(dst_pixels_opt);
2518 free_aligned_buffer_page_end(dst_pixels_c);
2519 }
2520
2521 } // namespace libyuv
2522