1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12 #include <stdlib.h>
13 #include <time.h>
14
15 #include "../unit_test/unit_test.h"
16 #include "libyuv/compare.h"
17 #include "libyuv/convert.h"
18 #include "libyuv/convert_argb.h"
19 #include "libyuv/convert_from.h"
20 #include "libyuv/convert_from_argb.h"
21 #include "libyuv/cpu_id.h"
22 #include "libyuv/planar_functions.h"
23 #include "libyuv/rotate.h"
24 #include "libyuv/scale.h"
25
26 #ifdef ENABLE_ROW_TESTS
27 // row.h defines SIMD_ALIGNED, overriding unit_test.h
28 // TODO(fbarchard): Remove row.h from unittests. Test public functions.
29 #include "libyuv/row.h" /* For ScaleSumSamples_Neon */
30 #endif
31
32 #if defined(LIBYUV_BIT_EXACT)
33 #define EXPECTED_ATTENUATE_DIFF 0
34 #else
35 #define EXPECTED_ATTENUATE_DIFF 2
36 #endif
37
38 namespace libyuv {
39
TEST_F(LibYUVPlanarTest,TestAttenuate)40 TEST_F(LibYUVPlanarTest, TestAttenuate) {
41 const int kSize = 1280 * 4;
42 align_buffer_page_end(orig_pixels, kSize);
43 align_buffer_page_end(atten_pixels, kSize);
44 align_buffer_page_end(unatten_pixels, kSize);
45 align_buffer_page_end(atten2_pixels, kSize);
46
47 // Test unattenuation clamps
48 orig_pixels[0 * 4 + 0] = 200u;
49 orig_pixels[0 * 4 + 1] = 129u;
50 orig_pixels[0 * 4 + 2] = 127u;
51 orig_pixels[0 * 4 + 3] = 128u;
52 // Test unattenuation transparent and opaque are unaffected
53 orig_pixels[1 * 4 + 0] = 16u;
54 orig_pixels[1 * 4 + 1] = 64u;
55 orig_pixels[1 * 4 + 2] = 192u;
56 orig_pixels[1 * 4 + 3] = 0u;
57 orig_pixels[2 * 4 + 0] = 16u;
58 orig_pixels[2 * 4 + 1] = 64u;
59 orig_pixels[2 * 4 + 2] = 192u;
60 orig_pixels[2 * 4 + 3] = 255u;
61 orig_pixels[3 * 4 + 0] = 16u;
62 orig_pixels[3 * 4 + 1] = 64u;
63 orig_pixels[3 * 4 + 2] = 192u;
64 orig_pixels[3 * 4 + 3] = 128u;
65 ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1);
66 EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
67 EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
68 EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
69 EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]);
70 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]);
71 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
72 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
73 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
74 EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]);
75 EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]);
76 EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]);
77 EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]);
78 EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]);
79 EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]);
80 EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]);
81 EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]);
82
83 for (int i = 0; i < 1280; ++i) {
84 orig_pixels[i * 4 + 0] = i;
85 orig_pixels[i * 4 + 1] = i / 2;
86 orig_pixels[i * 4 + 2] = i / 3;
87 orig_pixels[i * 4 + 3] = i;
88 }
89 ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1);
90 ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1);
91 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
92 ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
93 }
94 for (int i = 0; i < 1280; ++i) {
95 EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2);
96 EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2);
97 EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2);
98 EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2);
99 }
100 // Make sure transparent, 50% and opaque are fully accurate.
101 EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
102 EXPECT_EQ(0, atten_pixels[0 * 4 + 1]);
103 EXPECT_EQ(0, atten_pixels[0 * 4 + 2]);
104 EXPECT_EQ(0, atten_pixels[0 * 4 + 3]);
105 EXPECT_EQ(64, atten_pixels[128 * 4 + 0]);
106 EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
107 EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
108 EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
109 EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF);
110 EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF);
111 EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF);
112 EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
113
114 free_aligned_buffer_page_end(atten2_pixels);
115 free_aligned_buffer_page_end(unatten_pixels);
116 free_aligned_buffer_page_end(atten_pixels);
117 free_aligned_buffer_page_end(orig_pixels);
118 }
119
TestAttenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)120 static int TestAttenuateI(int width,
121 int height,
122 int benchmark_iterations,
123 int disable_cpu_flags,
124 int benchmark_cpu_info,
125 int invert,
126 int off) {
127 if (width < 1) {
128 width = 1;
129 }
130 const int kBpp = 4;
131 const int kStride = width * kBpp;
132 align_buffer_page_end(src_argb, kStride * height + off);
133 align_buffer_page_end(dst_argb_c, kStride * height);
134 align_buffer_page_end(dst_argb_opt, kStride * height);
135 for (int i = 0; i < kStride * height; ++i) {
136 src_argb[i + off] = (fastrand() & 0xff);
137 }
138 memset(dst_argb_c, 0, kStride * height);
139 memset(dst_argb_opt, 0, kStride * height);
140
141 MaskCpuFlags(disable_cpu_flags);
142 ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
143 invert * height);
144 MaskCpuFlags(benchmark_cpu_info);
145 for (int i = 0; i < benchmark_iterations; ++i) {
146 ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
147 invert * height);
148 }
149 int max_diff = 0;
150 for (int i = 0; i < kStride * height; ++i) {
151 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
152 static_cast<int>(dst_argb_opt[i]));
153 if (abs_diff > max_diff) {
154 max_diff = abs_diff;
155 }
156 }
157 free_aligned_buffer_page_end(src_argb);
158 free_aligned_buffer_page_end(dst_argb_c);
159 free_aligned_buffer_page_end(dst_argb_opt);
160 return max_diff;
161 }
162
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Any)163 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
164 int max_diff = TestAttenuateI(benchmark_width_ + 1, benchmark_height_,
165 benchmark_iterations_, disable_cpu_flags_,
166 benchmark_cpu_info_, +1, 0);
167
168 EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
169 }
170
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Unaligned)171 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
172 int max_diff =
173 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
174 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
175 EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
176 }
177
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Invert)178 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
179 int max_diff =
180 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
181 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
182 EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
183 }
184
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Opt)185 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
186 int max_diff =
187 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
188 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
189 EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
190 }
191
TestUnattenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)192 static int TestUnattenuateI(int width,
193 int height,
194 int benchmark_iterations,
195 int disable_cpu_flags,
196 int benchmark_cpu_info,
197 int invert,
198 int off) {
199 if (width < 1) {
200 width = 1;
201 }
202 const int kBpp = 4;
203 const int kStride = width * kBpp;
204 align_buffer_page_end(src_argb, kStride * height + off);
205 align_buffer_page_end(dst_argb_c, kStride * height);
206 align_buffer_page_end(dst_argb_opt, kStride * height);
207 for (int i = 0; i < kStride * height; ++i) {
208 src_argb[i + off] = (fastrand() & 0xff);
209 }
210 ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width,
211 height);
212 memset(dst_argb_c, 0, kStride * height);
213 memset(dst_argb_opt, 0, kStride * height);
214
215 MaskCpuFlags(disable_cpu_flags);
216 ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
217 invert * height);
218 MaskCpuFlags(benchmark_cpu_info);
219 for (int i = 0; i < benchmark_iterations; ++i) {
220 ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
221 invert * height);
222 }
223 int max_diff = 0;
224 for (int i = 0; i < kStride * height; ++i) {
225 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
226 static_cast<int>(dst_argb_opt[i]));
227 if (abs_diff > max_diff) {
228 max_diff = abs_diff;
229 }
230 }
231 free_aligned_buffer_page_end(src_argb);
232 free_aligned_buffer_page_end(dst_argb_c);
233 free_aligned_buffer_page_end(dst_argb_opt);
234 return max_diff;
235 }
236
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Any)237 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
238 int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
239 benchmark_iterations_, disable_cpu_flags_,
240 benchmark_cpu_info_, +1, 0);
241 EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
242 }
243
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Unaligned)244 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
245 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
246 benchmark_iterations_, disable_cpu_flags_,
247 benchmark_cpu_info_, +1, 1);
248 EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
249 }
250
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Invert)251 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
252 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
253 benchmark_iterations_, disable_cpu_flags_,
254 benchmark_cpu_info_, -1, 0);
255 EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
256 }
257
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Opt)258 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
259 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
260 benchmark_iterations_, disable_cpu_flags_,
261 benchmark_cpu_info_, +1, 0);
262 EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
263 }
264
TEST_F(LibYUVPlanarTest,TestARGBComputeCumulativeSum)265 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
266 SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]);
267 SIMD_ALIGNED(int32_t added_pixels[16][16][4]);
268
269 for (int y = 0; y < 16; ++y) {
270 for (int x = 0; x < 16; ++x) {
271 orig_pixels[y][x][0] = 1u;
272 orig_pixels[y][x][1] = 2u;
273 orig_pixels[y][x][2] = 3u;
274 orig_pixels[y][x][3] = 255u;
275 }
276 }
277
278 ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4,
279 &added_pixels[0][0][0], 16 * 4, 16, 16);
280
281 for (int y = 0; y < 16; ++y) {
282 for (int x = 0; x < 16; ++x) {
283 EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]);
284 EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]);
285 EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]);
286 EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]);
287 }
288 }
289 }
290
291 // near is for legacy platforms.
TEST_F(LibYUVPlanarTest,TestARGBGray)292 TEST_F(LibYUVPlanarTest, TestARGBGray) {
293 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
294 memset(orig_pixels, 0, sizeof(orig_pixels));
295
296 // Test blue
297 orig_pixels[0][0] = 255u;
298 orig_pixels[0][1] = 0u;
299 orig_pixels[0][2] = 0u;
300 orig_pixels[0][3] = 128u;
301 // Test green
302 orig_pixels[1][0] = 0u;
303 orig_pixels[1][1] = 255u;
304 orig_pixels[1][2] = 0u;
305 orig_pixels[1][3] = 0u;
306 // Test red
307 orig_pixels[2][0] = 0u;
308 orig_pixels[2][1] = 0u;
309 orig_pixels[2][2] = 255u;
310 orig_pixels[2][3] = 255u;
311 // Test black
312 orig_pixels[3][0] = 0u;
313 orig_pixels[3][1] = 0u;
314 orig_pixels[3][2] = 0u;
315 orig_pixels[3][3] = 255u;
316 // Test white
317 orig_pixels[4][0] = 255u;
318 orig_pixels[4][1] = 255u;
319 orig_pixels[4][2] = 255u;
320 orig_pixels[4][3] = 255u;
321 // Test color
322 orig_pixels[5][0] = 16u;
323 orig_pixels[5][1] = 64u;
324 orig_pixels[5][2] = 192u;
325 orig_pixels[5][3] = 224u;
326 // Do 16 to test asm version.
327 ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
328 EXPECT_NEAR(29u, orig_pixels[0][0], 1);
329 EXPECT_NEAR(29u, orig_pixels[0][1], 1);
330 EXPECT_NEAR(29u, orig_pixels[0][2], 1);
331 EXPECT_EQ(128u, orig_pixels[0][3]);
332 EXPECT_EQ(149u, orig_pixels[1][0]);
333 EXPECT_EQ(149u, orig_pixels[1][1]);
334 EXPECT_EQ(149u, orig_pixels[1][2]);
335 EXPECT_EQ(0u, orig_pixels[1][3]);
336 EXPECT_NEAR(77u, orig_pixels[2][0], 1);
337 EXPECT_NEAR(77u, orig_pixels[2][1], 1);
338 EXPECT_NEAR(77u, orig_pixels[2][2], 1);
339 EXPECT_EQ(255u, orig_pixels[2][3]);
340 EXPECT_EQ(0u, orig_pixels[3][0]);
341 EXPECT_EQ(0u, orig_pixels[3][1]);
342 EXPECT_EQ(0u, orig_pixels[3][2]);
343 EXPECT_EQ(255u, orig_pixels[3][3]);
344 EXPECT_EQ(255u, orig_pixels[4][0]);
345 EXPECT_EQ(255u, orig_pixels[4][1]);
346 EXPECT_EQ(255u, orig_pixels[4][2]);
347 EXPECT_EQ(255u, orig_pixels[4][3]);
348 EXPECT_NEAR(97u, orig_pixels[5][0], 1);
349 EXPECT_NEAR(97u, orig_pixels[5][1], 1);
350 EXPECT_NEAR(97u, orig_pixels[5][2], 1);
351 EXPECT_EQ(224u, orig_pixels[5][3]);
352 for (int i = 0; i < 1280; ++i) {
353 orig_pixels[i][0] = i;
354 orig_pixels[i][1] = i / 2;
355 orig_pixels[i][2] = i / 3;
356 orig_pixels[i][3] = i;
357 }
358 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
359 ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
360 }
361 }
362
TEST_F(LibYUVPlanarTest,TestARGBGrayTo)363 TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
364 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
365 SIMD_ALIGNED(uint8_t gray_pixels[1280][4]);
366 memset(orig_pixels, 0, sizeof(orig_pixels));
367
368 // Test blue
369 orig_pixels[0][0] = 255u;
370 orig_pixels[0][1] = 0u;
371 orig_pixels[0][2] = 0u;
372 orig_pixels[0][3] = 128u;
373 // Test green
374 orig_pixels[1][0] = 0u;
375 orig_pixels[1][1] = 255u;
376 orig_pixels[1][2] = 0u;
377 orig_pixels[1][3] = 0u;
378 // Test red
379 orig_pixels[2][0] = 0u;
380 orig_pixels[2][1] = 0u;
381 orig_pixels[2][2] = 255u;
382 orig_pixels[2][3] = 255u;
383 // Test black
384 orig_pixels[3][0] = 0u;
385 orig_pixels[3][1] = 0u;
386 orig_pixels[3][2] = 0u;
387 orig_pixels[3][3] = 255u;
388 // Test white
389 orig_pixels[4][0] = 255u;
390 orig_pixels[4][1] = 255u;
391 orig_pixels[4][2] = 255u;
392 orig_pixels[4][3] = 255u;
393 // Test color
394 orig_pixels[5][0] = 16u;
395 orig_pixels[5][1] = 64u;
396 orig_pixels[5][2] = 192u;
397 orig_pixels[5][3] = 224u;
398 // Do 16 to test asm version.
399 ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
400 EXPECT_NEAR(30u, gray_pixels[0][0], 1);
401 EXPECT_NEAR(30u, gray_pixels[0][1], 1);
402 EXPECT_NEAR(30u, gray_pixels[0][2], 1);
403 EXPECT_NEAR(128u, gray_pixels[0][3], 1);
404 EXPECT_NEAR(149u, gray_pixels[1][0], 1);
405 EXPECT_NEAR(149u, gray_pixels[1][1], 1);
406 EXPECT_NEAR(149u, gray_pixels[1][2], 1);
407 EXPECT_NEAR(0u, gray_pixels[1][3], 1);
408 EXPECT_NEAR(76u, gray_pixels[2][0], 1);
409 EXPECT_NEAR(76u, gray_pixels[2][1], 1);
410 EXPECT_NEAR(76u, gray_pixels[2][2], 1);
411 EXPECT_NEAR(255u, gray_pixels[2][3], 1);
412 EXPECT_NEAR(0u, gray_pixels[3][0], 1);
413 EXPECT_NEAR(0u, gray_pixels[3][1], 1);
414 EXPECT_NEAR(0u, gray_pixels[3][2], 1);
415 EXPECT_NEAR(255u, gray_pixels[3][3], 1);
416 EXPECT_NEAR(255u, gray_pixels[4][0], 1);
417 EXPECT_NEAR(255u, gray_pixels[4][1], 1);
418 EXPECT_NEAR(255u, gray_pixels[4][2], 1);
419 EXPECT_NEAR(255u, gray_pixels[4][3], 1);
420 EXPECT_NEAR(96u, gray_pixels[5][0], 1);
421 EXPECT_NEAR(96u, gray_pixels[5][1], 1);
422 EXPECT_NEAR(96u, gray_pixels[5][2], 1);
423 EXPECT_NEAR(224u, gray_pixels[5][3], 1);
424 for (int i = 0; i < 1280; ++i) {
425 orig_pixels[i][0] = i;
426 orig_pixels[i][1] = i / 2;
427 orig_pixels[i][2] = i / 3;
428 orig_pixels[i][3] = i;
429 }
430 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
431 ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
432 }
433
434 for (int i = 0; i < 256; ++i) {
435 orig_pixels[i][0] = i;
436 orig_pixels[i][1] = i;
437 orig_pixels[i][2] = i;
438 orig_pixels[i][3] = i;
439 }
440 ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1);
441 for (int i = 0; i < 256; ++i) {
442 EXPECT_EQ(i, orig_pixels[i][0]);
443 EXPECT_EQ(i, orig_pixels[i][1]);
444 EXPECT_EQ(i, orig_pixels[i][2]);
445 EXPECT_EQ(i, orig_pixels[i][3]);
446 }
447 }
448
TEST_F(LibYUVPlanarTest,TestARGBSepia)449 TEST_F(LibYUVPlanarTest, TestARGBSepia) {
450 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
451 memset(orig_pixels, 0, sizeof(orig_pixels));
452
453 // Test blue
454 orig_pixels[0][0] = 255u;
455 orig_pixels[0][1] = 0u;
456 orig_pixels[0][2] = 0u;
457 orig_pixels[0][3] = 128u;
458 // Test green
459 orig_pixels[1][0] = 0u;
460 orig_pixels[1][1] = 255u;
461 orig_pixels[1][2] = 0u;
462 orig_pixels[1][3] = 0u;
463 // Test red
464 orig_pixels[2][0] = 0u;
465 orig_pixels[2][1] = 0u;
466 orig_pixels[2][2] = 255u;
467 orig_pixels[2][3] = 255u;
468 // Test black
469 orig_pixels[3][0] = 0u;
470 orig_pixels[3][1] = 0u;
471 orig_pixels[3][2] = 0u;
472 orig_pixels[3][3] = 255u;
473 // Test white
474 orig_pixels[4][0] = 255u;
475 orig_pixels[4][1] = 255u;
476 orig_pixels[4][2] = 255u;
477 orig_pixels[4][3] = 255u;
478 // Test color
479 orig_pixels[5][0] = 16u;
480 orig_pixels[5][1] = 64u;
481 orig_pixels[5][2] = 192u;
482 orig_pixels[5][3] = 224u;
483 // Do 16 to test asm version.
484 ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
485 EXPECT_EQ(33u, orig_pixels[0][0]);
486 EXPECT_EQ(43u, orig_pixels[0][1]);
487 EXPECT_EQ(47u, orig_pixels[0][2]);
488 EXPECT_EQ(128u, orig_pixels[0][3]);
489 EXPECT_EQ(135u, orig_pixels[1][0]);
490 EXPECT_EQ(175u, orig_pixels[1][1]);
491 EXPECT_EQ(195u, orig_pixels[1][2]);
492 EXPECT_EQ(0u, orig_pixels[1][3]);
493 EXPECT_EQ(69u, orig_pixels[2][0]);
494 EXPECT_EQ(89u, orig_pixels[2][1]);
495 EXPECT_EQ(99u, orig_pixels[2][2]);
496 EXPECT_EQ(255u, orig_pixels[2][3]);
497 EXPECT_EQ(0u, orig_pixels[3][0]);
498 EXPECT_EQ(0u, orig_pixels[3][1]);
499 EXPECT_EQ(0u, orig_pixels[3][2]);
500 EXPECT_EQ(255u, orig_pixels[3][3]);
501 EXPECT_EQ(239u, orig_pixels[4][0]);
502 EXPECT_EQ(255u, orig_pixels[4][1]);
503 EXPECT_EQ(255u, orig_pixels[4][2]);
504 EXPECT_EQ(255u, orig_pixels[4][3]);
505 EXPECT_EQ(88u, orig_pixels[5][0]);
506 EXPECT_EQ(114u, orig_pixels[5][1]);
507 EXPECT_EQ(127u, orig_pixels[5][2]);
508 EXPECT_EQ(224u, orig_pixels[5][3]);
509
510 for (int i = 0; i < 1280; ++i) {
511 orig_pixels[i][0] = i;
512 orig_pixels[i][1] = i / 2;
513 orig_pixels[i][2] = i / 3;
514 orig_pixels[i][3] = i;
515 }
516 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
517 ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
518 }
519 }
520
TEST_F(LibYUVPlanarTest,TestARGBColorMatrix)521 TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
522 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
523 SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
524 SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
525
526 // Matrix for Sepia.
527 SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
528 17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
529 24 / 2, 98 / 2, 50 / 2, 0, 0, 0, 0, 64, // Copy alpha.
530 };
531 memset(orig_pixels, 0, sizeof(orig_pixels));
532
533 // Test blue
534 orig_pixels[0][0] = 255u;
535 orig_pixels[0][1] = 0u;
536 orig_pixels[0][2] = 0u;
537 orig_pixels[0][3] = 128u;
538 // Test green
539 orig_pixels[1][0] = 0u;
540 orig_pixels[1][1] = 255u;
541 orig_pixels[1][2] = 0u;
542 orig_pixels[1][3] = 0u;
543 // Test red
544 orig_pixels[2][0] = 0u;
545 orig_pixels[2][1] = 0u;
546 orig_pixels[2][2] = 255u;
547 orig_pixels[2][3] = 255u;
548 // Test color
549 orig_pixels[3][0] = 16u;
550 orig_pixels[3][1] = 64u;
551 orig_pixels[3][2] = 192u;
552 orig_pixels[3][3] = 224u;
553 // Do 16 to test asm version.
554 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
555 &kRGBToSepia[0], 16, 1);
556 EXPECT_EQ(31u, dst_pixels_opt[0][0]);
557 EXPECT_EQ(43u, dst_pixels_opt[0][1]);
558 EXPECT_EQ(47u, dst_pixels_opt[0][2]);
559 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
560 EXPECT_EQ(135u, dst_pixels_opt[1][0]);
561 EXPECT_EQ(175u, dst_pixels_opt[1][1]);
562 EXPECT_EQ(195u, dst_pixels_opt[1][2]);
563 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
564 EXPECT_EQ(67u, dst_pixels_opt[2][0]);
565 EXPECT_EQ(87u, dst_pixels_opt[2][1]);
566 EXPECT_EQ(99u, dst_pixels_opt[2][2]);
567 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
568 EXPECT_EQ(87u, dst_pixels_opt[3][0]);
569 EXPECT_EQ(112u, dst_pixels_opt[3][1]);
570 EXPECT_EQ(127u, dst_pixels_opt[3][2]);
571 EXPECT_EQ(224u, dst_pixels_opt[3][3]);
572
573 for (int i = 0; i < 1280; ++i) {
574 orig_pixels[i][0] = i;
575 orig_pixels[i][1] = i / 2;
576 orig_pixels[i][2] = i / 3;
577 orig_pixels[i][3] = i;
578 }
579 MaskCpuFlags(disable_cpu_flags_);
580 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
581 &kRGBToSepia[0], 1280, 1);
582 MaskCpuFlags(benchmark_cpu_info_);
583
584 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
585 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
586 &kRGBToSepia[0], 1280, 1);
587 }
588
589 for (int i = 0; i < 1280; ++i) {
590 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
591 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
592 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
593 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
594 }
595 }
596
TEST_F(LibYUVPlanarTest,TestRGBColorMatrix)597 TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
598 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
599
600 // Matrix for Sepia.
601 SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
602 17, 68, 35, 0, 22, 88, 45, 0,
603 24, 98, 50, 0, 0, 0, 0, 0, // Unused but makes matrix 16 bytes.
604 };
605 memset(orig_pixels, 0, sizeof(orig_pixels));
606
607 // Test blue
608 orig_pixels[0][0] = 255u;
609 orig_pixels[0][1] = 0u;
610 orig_pixels[0][2] = 0u;
611 orig_pixels[0][3] = 128u;
612 // Test green
613 orig_pixels[1][0] = 0u;
614 orig_pixels[1][1] = 255u;
615 orig_pixels[1][2] = 0u;
616 orig_pixels[1][3] = 0u;
617 // Test red
618 orig_pixels[2][0] = 0u;
619 orig_pixels[2][1] = 0u;
620 orig_pixels[2][2] = 255u;
621 orig_pixels[2][3] = 255u;
622 // Test color
623 orig_pixels[3][0] = 16u;
624 orig_pixels[3][1] = 64u;
625 orig_pixels[3][2] = 192u;
626 orig_pixels[3][3] = 224u;
627 // Do 16 to test asm version.
628 RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
629 EXPECT_EQ(31u, orig_pixels[0][0]);
630 EXPECT_EQ(43u, orig_pixels[0][1]);
631 EXPECT_EQ(47u, orig_pixels[0][2]);
632 EXPECT_EQ(128u, orig_pixels[0][3]);
633 EXPECT_EQ(135u, orig_pixels[1][0]);
634 EXPECT_EQ(175u, orig_pixels[1][1]);
635 EXPECT_EQ(195u, orig_pixels[1][2]);
636 EXPECT_EQ(0u, orig_pixels[1][3]);
637 EXPECT_EQ(67u, orig_pixels[2][0]);
638 EXPECT_EQ(87u, orig_pixels[2][1]);
639 EXPECT_EQ(99u, orig_pixels[2][2]);
640 EXPECT_EQ(255u, orig_pixels[2][3]);
641 EXPECT_EQ(87u, orig_pixels[3][0]);
642 EXPECT_EQ(112u, orig_pixels[3][1]);
643 EXPECT_EQ(127u, orig_pixels[3][2]);
644 EXPECT_EQ(224u, orig_pixels[3][3]);
645
646 for (int i = 0; i < 1280; ++i) {
647 orig_pixels[i][0] = i;
648 orig_pixels[i][1] = i / 2;
649 orig_pixels[i][2] = i / 3;
650 orig_pixels[i][3] = i;
651 }
652 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
653 RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
654 }
655 }
656
TEST_F(LibYUVPlanarTest,TestARGBColorTable)657 TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
658 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
659 memset(orig_pixels, 0, sizeof(orig_pixels));
660
661 // Matrix for Sepia.
662 static const uint8_t kARGBTable[256 * 4] = {
663 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
664 };
665
666 orig_pixels[0][0] = 0u;
667 orig_pixels[0][1] = 0u;
668 orig_pixels[0][2] = 0u;
669 orig_pixels[0][3] = 0u;
670 orig_pixels[1][0] = 1u;
671 orig_pixels[1][1] = 1u;
672 orig_pixels[1][2] = 1u;
673 orig_pixels[1][3] = 1u;
674 orig_pixels[2][0] = 2u;
675 orig_pixels[2][1] = 2u;
676 orig_pixels[2][2] = 2u;
677 orig_pixels[2][3] = 2u;
678 orig_pixels[3][0] = 0u;
679 orig_pixels[3][1] = 1u;
680 orig_pixels[3][2] = 2u;
681 orig_pixels[3][3] = 3u;
682 // Do 16 to test asm version.
683 ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
684 EXPECT_EQ(1u, orig_pixels[0][0]);
685 EXPECT_EQ(2u, orig_pixels[0][1]);
686 EXPECT_EQ(3u, orig_pixels[0][2]);
687 EXPECT_EQ(4u, orig_pixels[0][3]);
688 EXPECT_EQ(5u, orig_pixels[1][0]);
689 EXPECT_EQ(6u, orig_pixels[1][1]);
690 EXPECT_EQ(7u, orig_pixels[1][2]);
691 EXPECT_EQ(8u, orig_pixels[1][3]);
692 EXPECT_EQ(9u, orig_pixels[2][0]);
693 EXPECT_EQ(10u, orig_pixels[2][1]);
694 EXPECT_EQ(11u, orig_pixels[2][2]);
695 EXPECT_EQ(12u, orig_pixels[2][3]);
696 EXPECT_EQ(1u, orig_pixels[3][0]);
697 EXPECT_EQ(6u, orig_pixels[3][1]);
698 EXPECT_EQ(11u, orig_pixels[3][2]);
699 EXPECT_EQ(16u, orig_pixels[3][3]);
700
701 for (int i = 0; i < 1280; ++i) {
702 orig_pixels[i][0] = i;
703 orig_pixels[i][1] = i / 2;
704 orig_pixels[i][2] = i / 3;
705 orig_pixels[i][3] = i;
706 }
707 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
708 ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
709 }
710 }
711
712 // Same as TestARGBColorTable except alpha does not change.
TEST_F(LibYUVPlanarTest,TestRGBColorTable)713 TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
714 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
715 memset(orig_pixels, 0, sizeof(orig_pixels));
716
717 // Matrix for Sepia.
718 static const uint8_t kARGBTable[256 * 4] = {
719 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
720 };
721
722 orig_pixels[0][0] = 0u;
723 orig_pixels[0][1] = 0u;
724 orig_pixels[0][2] = 0u;
725 orig_pixels[0][3] = 0u;
726 orig_pixels[1][0] = 1u;
727 orig_pixels[1][1] = 1u;
728 orig_pixels[1][2] = 1u;
729 orig_pixels[1][3] = 1u;
730 orig_pixels[2][0] = 2u;
731 orig_pixels[2][1] = 2u;
732 orig_pixels[2][2] = 2u;
733 orig_pixels[2][3] = 2u;
734 orig_pixels[3][0] = 0u;
735 orig_pixels[3][1] = 1u;
736 orig_pixels[3][2] = 2u;
737 orig_pixels[3][3] = 3u;
738 // Do 16 to test asm version.
739 RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
740 EXPECT_EQ(1u, orig_pixels[0][0]);
741 EXPECT_EQ(2u, orig_pixels[0][1]);
742 EXPECT_EQ(3u, orig_pixels[0][2]);
743 EXPECT_EQ(0u, orig_pixels[0][3]); // Alpha unchanged.
744 EXPECT_EQ(5u, orig_pixels[1][0]);
745 EXPECT_EQ(6u, orig_pixels[1][1]);
746 EXPECT_EQ(7u, orig_pixels[1][2]);
747 EXPECT_EQ(1u, orig_pixels[1][3]); // Alpha unchanged.
748 EXPECT_EQ(9u, orig_pixels[2][0]);
749 EXPECT_EQ(10u, orig_pixels[2][1]);
750 EXPECT_EQ(11u, orig_pixels[2][2]);
751 EXPECT_EQ(2u, orig_pixels[2][3]); // Alpha unchanged.
752 EXPECT_EQ(1u, orig_pixels[3][0]);
753 EXPECT_EQ(6u, orig_pixels[3][1]);
754 EXPECT_EQ(11u, orig_pixels[3][2]);
755 EXPECT_EQ(3u, orig_pixels[3][3]); // Alpha unchanged.
756
757 for (int i = 0; i < 1280; ++i) {
758 orig_pixels[i][0] = i;
759 orig_pixels[i][1] = i / 2;
760 orig_pixels[i][2] = i / 3;
761 orig_pixels[i][3] = i;
762 }
763 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
764 RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
765 }
766 }
767
TEST_F(LibYUVPlanarTest,TestARGBQuantize)768 TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
769 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
770
771 for (int i = 0; i < 1280; ++i) {
772 orig_pixels[i][0] = i;
773 orig_pixels[i][1] = i / 2;
774 orig_pixels[i][2] = i / 3;
775 orig_pixels[i][3] = i;
776 }
777 ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
778 1280, 1);
779
780 for (int i = 0; i < 1280; ++i) {
781 EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]);
782 EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]);
783 EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]);
784 EXPECT_EQ(i & 255, orig_pixels[i][3]);
785 }
786 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
787 ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
788 1280, 1);
789 }
790 }
791
TEST_F(LibYUVPlanarTest,ARGBMirror_Opt)792 TEST_F(LibYUVPlanarTest, ARGBMirror_Opt) {
793 align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 4);
794 align_buffer_page_end(dst_pixels_opt,
795 benchmark_width_ * benchmark_height_ * 4);
796 align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 4);
797
798 MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 4);
799 MaskCpuFlags(disable_cpu_flags_);
800 ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_c,
801 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
802 MaskCpuFlags(benchmark_cpu_info_);
803
804 for (int i = 0; i < benchmark_iterations_; ++i) {
805 ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
806 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
807 }
808 for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
809 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
810 }
811 free_aligned_buffer_page_end(src_pixels);
812 free_aligned_buffer_page_end(dst_pixels_opt);
813 free_aligned_buffer_page_end(dst_pixels_c);
814 }
815
TEST_F(LibYUVPlanarTest,MirrorPlane_Opt)816 TEST_F(LibYUVPlanarTest, MirrorPlane_Opt) {
817 align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_);
818 align_buffer_page_end(dst_pixels_opt, benchmark_width_ * benchmark_height_);
819 align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_);
820
821 MemRandomize(src_pixels, benchmark_width_ * benchmark_height_);
822 MaskCpuFlags(disable_cpu_flags_);
823 MirrorPlane(src_pixels, benchmark_width_, dst_pixels_c, benchmark_width_,
824 benchmark_width_, benchmark_height_);
825 MaskCpuFlags(benchmark_cpu_info_);
826
827 for (int i = 0; i < benchmark_iterations_; ++i) {
828 MirrorPlane(src_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_,
829 benchmark_width_, benchmark_height_);
830 }
831 for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
832 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
833 }
834 free_aligned_buffer_page_end(src_pixels);
835 free_aligned_buffer_page_end(dst_pixels_opt);
836 free_aligned_buffer_page_end(dst_pixels_c);
837 }
838
TEST_F(LibYUVPlanarTest,MirrorUVPlane_Opt)839 TEST_F(LibYUVPlanarTest, MirrorUVPlane_Opt) {
840 align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 2);
841 align_buffer_page_end(dst_pixels_opt,
842 benchmark_width_ * benchmark_height_ * 2);
843 align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 2);
844
845 MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 2);
846 MaskCpuFlags(disable_cpu_flags_);
847 MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
848 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
849 MaskCpuFlags(benchmark_cpu_info_);
850
851 for (int i = 0; i < benchmark_iterations_; ++i) {
852 MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
853 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
854 }
855 for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
856 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
857 }
858 free_aligned_buffer_page_end(src_pixels);
859 free_aligned_buffer_page_end(dst_pixels_opt);
860 free_aligned_buffer_page_end(dst_pixels_c);
861 }
862
TEST_F(LibYUVPlanarTest,TestShade)863 TEST_F(LibYUVPlanarTest, TestShade) {
864 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
865 SIMD_ALIGNED(uint8_t shade_pixels[1280][4]);
866 memset(orig_pixels, 0, sizeof(orig_pixels));
867
868 orig_pixels[0][0] = 10u;
869 orig_pixels[0][1] = 20u;
870 orig_pixels[0][2] = 40u;
871 orig_pixels[0][3] = 80u;
872 orig_pixels[1][0] = 0u;
873 orig_pixels[1][1] = 0u;
874 orig_pixels[1][2] = 0u;
875 orig_pixels[1][3] = 255u;
876 orig_pixels[2][0] = 0u;
877 orig_pixels[2][1] = 0u;
878 orig_pixels[2][2] = 0u;
879 orig_pixels[2][3] = 0u;
880 orig_pixels[3][0] = 0u;
881 orig_pixels[3][1] = 0u;
882 orig_pixels[3][2] = 0u;
883 orig_pixels[3][3] = 0u;
884 // Do 8 pixels to allow opt version to be used.
885 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff);
886 EXPECT_EQ(10u, shade_pixels[0][0]);
887 EXPECT_EQ(20u, shade_pixels[0][1]);
888 EXPECT_EQ(40u, shade_pixels[0][2]);
889 EXPECT_EQ(40u, shade_pixels[0][3]);
890 EXPECT_EQ(0u, shade_pixels[1][0]);
891 EXPECT_EQ(0u, shade_pixels[1][1]);
892 EXPECT_EQ(0u, shade_pixels[1][2]);
893 EXPECT_EQ(128u, shade_pixels[1][3]);
894 EXPECT_EQ(0u, shade_pixels[2][0]);
895 EXPECT_EQ(0u, shade_pixels[2][1]);
896 EXPECT_EQ(0u, shade_pixels[2][2]);
897 EXPECT_EQ(0u, shade_pixels[2][3]);
898 EXPECT_EQ(0u, shade_pixels[3][0]);
899 EXPECT_EQ(0u, shade_pixels[3][1]);
900 EXPECT_EQ(0u, shade_pixels[3][2]);
901 EXPECT_EQ(0u, shade_pixels[3][3]);
902
903 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080);
904 EXPECT_EQ(5u, shade_pixels[0][0]);
905 EXPECT_EQ(10u, shade_pixels[0][1]);
906 EXPECT_EQ(20u, shade_pixels[0][2]);
907 EXPECT_EQ(40u, shade_pixels[0][3]);
908
909 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080);
910 EXPECT_EQ(5u, shade_pixels[0][0]);
911 EXPECT_EQ(5u, shade_pixels[0][1]);
912 EXPECT_EQ(5u, shade_pixels[0][2]);
913 EXPECT_EQ(5u, shade_pixels[0][3]);
914
915 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
916 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1,
917 0x80808080);
918 }
919 }
920
TEST_F(LibYUVPlanarTest,TestARGBInterpolate)921 TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
922 SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
923 SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]);
924 SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]);
925 memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
926 memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
927
928 orig_pixels_0[0][0] = 16u;
929 orig_pixels_0[0][1] = 32u;
930 orig_pixels_0[0][2] = 64u;
931 orig_pixels_0[0][3] = 128u;
932 orig_pixels_0[1][0] = 0u;
933 orig_pixels_0[1][1] = 0u;
934 orig_pixels_0[1][2] = 0u;
935 orig_pixels_0[1][3] = 255u;
936 orig_pixels_0[2][0] = 0u;
937 orig_pixels_0[2][1] = 0u;
938 orig_pixels_0[2][2] = 0u;
939 orig_pixels_0[2][3] = 0u;
940 orig_pixels_0[3][0] = 0u;
941 orig_pixels_0[3][1] = 0u;
942 orig_pixels_0[3][2] = 0u;
943 orig_pixels_0[3][3] = 0u;
944
945 orig_pixels_1[0][0] = 0u;
946 orig_pixels_1[0][1] = 0u;
947 orig_pixels_1[0][2] = 0u;
948 orig_pixels_1[0][3] = 0u;
949 orig_pixels_1[1][0] = 0u;
950 orig_pixels_1[1][1] = 0u;
951 orig_pixels_1[1][2] = 0u;
952 orig_pixels_1[1][3] = 0u;
953 orig_pixels_1[2][0] = 0u;
954 orig_pixels_1[2][1] = 0u;
955 orig_pixels_1[2][2] = 0u;
956 orig_pixels_1[2][3] = 0u;
957 orig_pixels_1[3][0] = 255u;
958 orig_pixels_1[3][1] = 255u;
959 orig_pixels_1[3][2] = 255u;
960 orig_pixels_1[3][3] = 255u;
961
962 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
963 &interpolate_pixels[0][0], 0, 4, 1, 128);
964 EXPECT_EQ(8u, interpolate_pixels[0][0]);
965 EXPECT_EQ(16u, interpolate_pixels[0][1]);
966 EXPECT_EQ(32u, interpolate_pixels[0][2]);
967 EXPECT_EQ(64u, interpolate_pixels[0][3]);
968 EXPECT_EQ(0u, interpolate_pixels[1][0]);
969 EXPECT_EQ(0u, interpolate_pixels[1][1]);
970 EXPECT_EQ(0u, interpolate_pixels[1][2]);
971 EXPECT_EQ(128u, interpolate_pixels[1][3]);
972 EXPECT_EQ(0u, interpolate_pixels[2][0]);
973 EXPECT_EQ(0u, interpolate_pixels[2][1]);
974 EXPECT_EQ(0u, interpolate_pixels[2][2]);
975 EXPECT_EQ(0u, interpolate_pixels[2][3]);
976 EXPECT_EQ(128u, interpolate_pixels[3][0]);
977 EXPECT_EQ(128u, interpolate_pixels[3][1]);
978 EXPECT_EQ(128u, interpolate_pixels[3][2]);
979 EXPECT_EQ(128u, interpolate_pixels[3][3]);
980
981 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
982 &interpolate_pixels[0][0], 0, 4, 1, 0);
983 EXPECT_EQ(16u, interpolate_pixels[0][0]);
984 EXPECT_EQ(32u, interpolate_pixels[0][1]);
985 EXPECT_EQ(64u, interpolate_pixels[0][2]);
986 EXPECT_EQ(128u, interpolate_pixels[0][3]);
987
988 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
989 &interpolate_pixels[0][0], 0, 4, 1, 192);
990
991 EXPECT_EQ(4u, interpolate_pixels[0][0]);
992 EXPECT_EQ(8u, interpolate_pixels[0][1]);
993 EXPECT_EQ(16u, interpolate_pixels[0][2]);
994 EXPECT_EQ(32u, interpolate_pixels[0][3]);
995
996 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
997 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
998 &interpolate_pixels[0][0], 0, 1280, 1, 128);
999 }
1000 }
1001
TEST_F(LibYUVPlanarTest,TestInterpolatePlane)1002 TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
1003 SIMD_ALIGNED(uint8_t orig_pixels_0[1280]);
1004 SIMD_ALIGNED(uint8_t orig_pixels_1[1280]);
1005 SIMD_ALIGNED(uint8_t interpolate_pixels[1280]);
1006 memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
1007 memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
1008
1009 orig_pixels_0[0] = 16u;
1010 orig_pixels_0[1] = 32u;
1011 orig_pixels_0[2] = 64u;
1012 orig_pixels_0[3] = 128u;
1013 orig_pixels_0[4] = 0u;
1014 orig_pixels_0[5] = 0u;
1015 orig_pixels_0[6] = 0u;
1016 orig_pixels_0[7] = 255u;
1017 orig_pixels_0[8] = 0u;
1018 orig_pixels_0[9] = 0u;
1019 orig_pixels_0[10] = 0u;
1020 orig_pixels_0[11] = 0u;
1021 orig_pixels_0[12] = 0u;
1022 orig_pixels_0[13] = 0u;
1023 orig_pixels_0[14] = 0u;
1024 orig_pixels_0[15] = 0u;
1025
1026 orig_pixels_1[0] = 0u;
1027 orig_pixels_1[1] = 0u;
1028 orig_pixels_1[2] = 0u;
1029 orig_pixels_1[3] = 0u;
1030 orig_pixels_1[4] = 0u;
1031 orig_pixels_1[5] = 0u;
1032 orig_pixels_1[6] = 0u;
1033 orig_pixels_1[7] = 0u;
1034 orig_pixels_1[8] = 0u;
1035 orig_pixels_1[9] = 0u;
1036 orig_pixels_1[10] = 0u;
1037 orig_pixels_1[11] = 0u;
1038 orig_pixels_1[12] = 255u;
1039 orig_pixels_1[13] = 255u;
1040 orig_pixels_1[14] = 255u;
1041 orig_pixels_1[15] = 255u;
1042
1043 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1044 &interpolate_pixels[0], 0, 16, 1, 128);
1045 EXPECT_EQ(8u, interpolate_pixels[0]);
1046 EXPECT_EQ(16u, interpolate_pixels[1]);
1047 EXPECT_EQ(32u, interpolate_pixels[2]);
1048 EXPECT_EQ(64u, interpolate_pixels[3]);
1049 EXPECT_EQ(0u, interpolate_pixels[4]);
1050 EXPECT_EQ(0u, interpolate_pixels[5]);
1051 EXPECT_EQ(0u, interpolate_pixels[6]);
1052 EXPECT_EQ(128u, interpolate_pixels[7]);
1053 EXPECT_EQ(0u, interpolate_pixels[8]);
1054 EXPECT_EQ(0u, interpolate_pixels[9]);
1055 EXPECT_EQ(0u, interpolate_pixels[10]);
1056 EXPECT_EQ(0u, interpolate_pixels[11]);
1057 EXPECT_EQ(128u, interpolate_pixels[12]);
1058 EXPECT_EQ(128u, interpolate_pixels[13]);
1059 EXPECT_EQ(128u, interpolate_pixels[14]);
1060 EXPECT_EQ(128u, interpolate_pixels[15]);
1061
1062 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1063 &interpolate_pixels[0], 0, 16, 1, 0);
1064 EXPECT_EQ(16u, interpolate_pixels[0]);
1065 EXPECT_EQ(32u, interpolate_pixels[1]);
1066 EXPECT_EQ(64u, interpolate_pixels[2]);
1067 EXPECT_EQ(128u, interpolate_pixels[3]);
1068
1069 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1070 &interpolate_pixels[0], 0, 16, 1, 192);
1071
1072 EXPECT_EQ(4u, interpolate_pixels[0]);
1073 EXPECT_EQ(8u, interpolate_pixels[1]);
1074 EXPECT_EQ(16u, interpolate_pixels[2]);
1075 EXPECT_EQ(32u, interpolate_pixels[3]);
1076
1077 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1078 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1079 &interpolate_pixels[0], 0, 1280, 1, 123);
1080 }
1081 }
1082
TEST_F(LibYUVPlanarTest,TestInterpolatePlane_16)1083 TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) {
1084 SIMD_ALIGNED(uint16_t orig_pixels_0[1280]);
1085 SIMD_ALIGNED(uint16_t orig_pixels_1[1280]);
1086 SIMD_ALIGNED(uint16_t interpolate_pixels[1280]);
1087 memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
1088 memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
1089
1090 orig_pixels_0[0] = 16u;
1091 orig_pixels_0[1] = 32u;
1092 orig_pixels_0[2] = 64u;
1093 orig_pixels_0[3] = 128u;
1094 orig_pixels_0[4] = 0u;
1095 orig_pixels_0[5] = 0u;
1096 orig_pixels_0[6] = 0u;
1097 orig_pixels_0[7] = 255u;
1098 orig_pixels_0[8] = 0u;
1099 orig_pixels_0[9] = 0u;
1100 orig_pixels_0[10] = 0u;
1101 orig_pixels_0[11] = 0u;
1102 orig_pixels_0[12] = 0u;
1103 orig_pixels_0[13] = 0u;
1104 orig_pixels_0[14] = 0u;
1105 orig_pixels_0[15] = 0u;
1106
1107 orig_pixels_1[0] = 0u;
1108 orig_pixels_1[1] = 0u;
1109 orig_pixels_1[2] = 0u;
1110 orig_pixels_1[3] = 0u;
1111 orig_pixels_1[4] = 0u;
1112 orig_pixels_1[5] = 0u;
1113 orig_pixels_1[6] = 0u;
1114 orig_pixels_1[7] = 0u;
1115 orig_pixels_1[8] = 0u;
1116 orig_pixels_1[9] = 0u;
1117 orig_pixels_1[10] = 0u;
1118 orig_pixels_1[11] = 0u;
1119 orig_pixels_1[12] = 255u;
1120 orig_pixels_1[13] = 255u;
1121 orig_pixels_1[14] = 255u;
1122 orig_pixels_1[15] = 255u;
1123
1124 InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1125 &interpolate_pixels[0], 0, 16, 1, 128);
1126 EXPECT_EQ(8u, interpolate_pixels[0]);
1127 EXPECT_EQ(16u, interpolate_pixels[1]);
1128 EXPECT_EQ(32u, interpolate_pixels[2]);
1129 EXPECT_EQ(64u, interpolate_pixels[3]);
1130 EXPECT_EQ(0u, interpolate_pixels[4]);
1131 EXPECT_EQ(0u, interpolate_pixels[5]);
1132 EXPECT_EQ(0u, interpolate_pixels[6]);
1133 EXPECT_EQ(128u, interpolate_pixels[7]);
1134 EXPECT_EQ(0u, interpolate_pixels[8]);
1135 EXPECT_EQ(0u, interpolate_pixels[9]);
1136 EXPECT_EQ(0u, interpolate_pixels[10]);
1137 EXPECT_EQ(0u, interpolate_pixels[11]);
1138 EXPECT_EQ(128u, interpolate_pixels[12]);
1139 EXPECT_EQ(128u, interpolate_pixels[13]);
1140 EXPECT_EQ(128u, interpolate_pixels[14]);
1141 EXPECT_EQ(128u, interpolate_pixels[15]);
1142
1143 InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1144 &interpolate_pixels[0], 0, 16, 1, 0);
1145 EXPECT_EQ(16u, interpolate_pixels[0]);
1146 EXPECT_EQ(32u, interpolate_pixels[1]);
1147 EXPECT_EQ(64u, interpolate_pixels[2]);
1148 EXPECT_EQ(128u, interpolate_pixels[3]);
1149
1150 InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1151 &interpolate_pixels[0], 0, 16, 1, 192);
1152
1153 EXPECT_EQ(4u, interpolate_pixels[0]);
1154 EXPECT_EQ(8u, interpolate_pixels[1]);
1155 EXPECT_EQ(16u, interpolate_pixels[2]);
1156 EXPECT_EQ(32u, interpolate_pixels[3]);
1157
1158 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1159 InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1160 &interpolate_pixels[0], 0, 1280, 1, 123);
1161 }
1162 }
1163
1164 #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
1165 N, NEG, OFF) \
1166 TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
1167 const int kWidth = W1280; \
1168 const int kHeight = benchmark_height_; \
1169 const int kStrideA = \
1170 (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
1171 const int kStrideB = \
1172 (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
1173 align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \
1174 align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF); \
1175 align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \
1176 align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \
1177 for (int i = 0; i < kStrideA * kHeight; ++i) { \
1178 src_argb_a[i + OFF] = (fastrand() & 0xff); \
1179 src_argb_b[i + OFF] = (fastrand() & 0xff); \
1180 } \
1181 MaskCpuFlags(disable_cpu_flags_); \
1182 ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1183 dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP); \
1184 MaskCpuFlags(benchmark_cpu_info_); \
1185 for (int i = 0; i < benchmark_iterations_; ++i) { \
1186 ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1187 dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP); \
1188 } \
1189 for (int i = 0; i < kStrideB * kHeight; ++i) { \
1190 EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
1191 } \
1192 free_aligned_buffer_page_end(src_argb_a); \
1193 free_aligned_buffer_page_end(src_argb_b); \
1194 free_aligned_buffer_page_end(dst_argb_c); \
1195 free_aligned_buffer_page_end(dst_argb_opt); \
1196 }
1197
1198 #define TESTINTERPOLATE(TERP) \
1199 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ + 1, TERP, _Any, +, 0) \
1200 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
1201 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \
1202 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
1203
1204 TESTINTERPOLATE(0)
1205 TESTINTERPOLATE(64)
1206 TESTINTERPOLATE(128)
1207 TESTINTERPOLATE(192)
1208 TESTINTERPOLATE(255)
1209
TestBlend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int attenuate)1210 static int TestBlend(int width,
1211 int height,
1212 int benchmark_iterations,
1213 int disable_cpu_flags,
1214 int benchmark_cpu_info,
1215 int invert,
1216 int off,
1217 int attenuate) {
1218 if (width < 1) {
1219 width = 1;
1220 }
1221 const int kBpp = 4;
1222 const int kStride = width * kBpp;
1223 align_buffer_page_end(src_argb_a, kStride * height + off);
1224 align_buffer_page_end(src_argb_b, kStride * height + off);
1225 align_buffer_page_end(dst_argb_c, kStride * height);
1226 align_buffer_page_end(dst_argb_opt, kStride * height);
1227 for (int i = 0; i < kStride * height; ++i) {
1228 src_argb_a[i + off] = (fastrand() & 0xff);
1229 src_argb_b[i + off] = (fastrand() & 0xff);
1230 }
1231 MemRandomize(src_argb_a, kStride * height + off);
1232 MemRandomize(src_argb_b, kStride * height + off);
1233 if (attenuate) {
1234 ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
1235 height);
1236 }
1237 memset(dst_argb_c, 255, kStride * height);
1238 memset(dst_argb_opt, 255, kStride * height);
1239
1240 MaskCpuFlags(disable_cpu_flags);
1241 ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1242 kStride, width, invert * height);
1243 MaskCpuFlags(benchmark_cpu_info);
1244 for (int i = 0; i < benchmark_iterations; ++i) {
1245 ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride,
1246 dst_argb_opt, kStride, width, invert * height);
1247 }
1248 int max_diff = 0;
1249 for (int i = 0; i < kStride * height; ++i) {
1250 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1251 static_cast<int>(dst_argb_opt[i]));
1252 if (abs_diff > max_diff) {
1253 max_diff = abs_diff;
1254 }
1255 }
1256 free_aligned_buffer_page_end(src_argb_a);
1257 free_aligned_buffer_page_end(src_argb_b);
1258 free_aligned_buffer_page_end(dst_argb_c);
1259 free_aligned_buffer_page_end(dst_argb_opt);
1260 return max_diff;
1261 }
1262
TEST_F(LibYUVPlanarTest,ARGBBlend_Any)1263 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
1264 int max_diff =
1265 TestBlend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1266 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
1267 EXPECT_LE(max_diff, 1);
1268 }
1269
TEST_F(LibYUVPlanarTest,ARGBBlend_Unaligned)1270 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
1271 int max_diff =
1272 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1273 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
1274 EXPECT_LE(max_diff, 1);
1275 }
1276
TEST_F(LibYUVPlanarTest,ARGBBlend_Invert)1277 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
1278 int max_diff =
1279 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1280 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
1281 EXPECT_LE(max_diff, 1);
1282 }
1283
TEST_F(LibYUVPlanarTest,ARGBBlend_Unattenuated)1284 TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) {
1285 int max_diff =
1286 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1287 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0);
1288 EXPECT_LE(max_diff, 1);
1289 }
1290
TEST_F(LibYUVPlanarTest,ARGBBlend_Opt)1291 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
1292 int max_diff =
1293 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1294 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
1295 EXPECT_LE(max_diff, 1);
1296 }
1297
TestBlendPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1298 static void TestBlendPlane(int width,
1299 int height,
1300 int benchmark_iterations,
1301 int disable_cpu_flags,
1302 int benchmark_cpu_info,
1303 int invert,
1304 int off) {
1305 if (width < 1) {
1306 width = 1;
1307 }
1308 const int kBpp = 1;
1309 const int kStride = width * kBpp;
1310 align_buffer_page_end(src_argb_a, kStride * height + off);
1311 align_buffer_page_end(src_argb_b, kStride * height + off);
1312 align_buffer_page_end(src_argb_alpha, kStride * height + off);
1313 align_buffer_page_end(dst_argb_c, kStride * height + off);
1314 align_buffer_page_end(dst_argb_opt, kStride * height + off);
1315 memset(dst_argb_c, 255, kStride * height + off);
1316 memset(dst_argb_opt, 255, kStride * height + off);
1317
1318 // Test source is maintained exactly if alpha is 255.
1319 for (int i = 0; i < width; ++i) {
1320 src_argb_a[i + off] = i & 255;
1321 src_argb_b[i + off] = 255 - (i & 255);
1322 }
1323 memset(src_argb_alpha + off, 255, width);
1324 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1325 src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1326 for (int i = 0; i < width; ++i) {
1327 EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
1328 }
1329 // Test destination is maintained exactly if alpha is 0.
1330 memset(src_argb_alpha + off, 0, width);
1331 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1332 src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1333 for (int i = 0; i < width; ++i) {
1334 EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
1335 }
1336 for (int i = 0; i < kStride * height; ++i) {
1337 src_argb_a[i + off] = (fastrand() & 0xff);
1338 src_argb_b[i + off] = (fastrand() & 0xff);
1339 src_argb_alpha[i + off] = (fastrand() & 0xff);
1340 }
1341
1342 MaskCpuFlags(disable_cpu_flags);
1343 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1344 src_argb_alpha + off, width, dst_argb_c + off, width, width,
1345 invert * height);
1346 MaskCpuFlags(benchmark_cpu_info);
1347 for (int i = 0; i < benchmark_iterations; ++i) {
1348 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1349 src_argb_alpha + off, width, dst_argb_opt + off, width, width,
1350 invert * height);
1351 }
1352 for (int i = 0; i < kStride * height; ++i) {
1353 EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
1354 }
1355 free_aligned_buffer_page_end(src_argb_a);
1356 free_aligned_buffer_page_end(src_argb_b);
1357 free_aligned_buffer_page_end(src_argb_alpha);
1358 free_aligned_buffer_page_end(dst_argb_c);
1359 free_aligned_buffer_page_end(dst_argb_opt);
1360 }
1361
TEST_F(LibYUVPlanarTest,BlendPlane_Opt)1362 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
1363 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1364 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1365 }
TEST_F(LibYUVPlanarTest,BlendPlane_Unaligned)1366 TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
1367 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1368 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1369 }
TEST_F(LibYUVPlanarTest,BlendPlane_Any)1370 TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
1371 TestBlendPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1372 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1373 }
TEST_F(LibYUVPlanarTest,BlendPlane_Invert)1374 TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
1375 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1376 disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
1377 }
1378
1379 #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
1380
TestI420Blend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1381 static void TestI420Blend(int width,
1382 int height,
1383 int benchmark_iterations,
1384 int disable_cpu_flags,
1385 int benchmark_cpu_info,
1386 int invert,
1387 int off) {
1388 width = ((width) > 0) ? (width) : 1;
1389 const int kStrideUV = SUBSAMPLE(width, 2);
1390 const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
1391 align_buffer_page_end(src_y0, width * height + off);
1392 align_buffer_page_end(src_u0, kSizeUV + off);
1393 align_buffer_page_end(src_v0, kSizeUV + off);
1394 align_buffer_page_end(src_y1, width * height + off);
1395 align_buffer_page_end(src_u1, kSizeUV + off);
1396 align_buffer_page_end(src_v1, kSizeUV + off);
1397 align_buffer_page_end(src_a, width * height + off);
1398 align_buffer_page_end(dst_y_c, width * height + off);
1399 align_buffer_page_end(dst_u_c, kSizeUV + off);
1400 align_buffer_page_end(dst_v_c, kSizeUV + off);
1401 align_buffer_page_end(dst_y_opt, width * height + off);
1402 align_buffer_page_end(dst_u_opt, kSizeUV + off);
1403 align_buffer_page_end(dst_v_opt, kSizeUV + off);
1404
1405 MemRandomize(src_y0, width * height + off);
1406 MemRandomize(src_u0, kSizeUV + off);
1407 MemRandomize(src_v0, kSizeUV + off);
1408 MemRandomize(src_y1, width * height + off);
1409 MemRandomize(src_u1, kSizeUV + off);
1410 MemRandomize(src_v1, kSizeUV + off);
1411 MemRandomize(src_a, width * height + off);
1412 memset(dst_y_c, 255, width * height + off);
1413 memset(dst_u_c, 255, kSizeUV + off);
1414 memset(dst_v_c, 255, kSizeUV + off);
1415 memset(dst_y_opt, 255, width * height + off);
1416 memset(dst_u_opt, 255, kSizeUV + off);
1417 memset(dst_v_opt, 255, kSizeUV + off);
1418
1419 MaskCpuFlags(disable_cpu_flags);
1420 I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1421 kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1422 src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width,
1423 dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width,
1424 invert * height);
1425 MaskCpuFlags(benchmark_cpu_info);
1426 for (int i = 0; i < benchmark_iterations; ++i) {
1427 I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1428 kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1429 src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off,
1430 width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV,
1431 width, invert * height);
1432 }
1433 for (int i = 0; i < width * height; ++i) {
1434 EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
1435 }
1436 for (int i = 0; i < kSizeUV; ++i) {
1437 EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]);
1438 EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]);
1439 }
1440 free_aligned_buffer_page_end(src_y0);
1441 free_aligned_buffer_page_end(src_u0);
1442 free_aligned_buffer_page_end(src_v0);
1443 free_aligned_buffer_page_end(src_y1);
1444 free_aligned_buffer_page_end(src_u1);
1445 free_aligned_buffer_page_end(src_v1);
1446 free_aligned_buffer_page_end(src_a);
1447 free_aligned_buffer_page_end(dst_y_c);
1448 free_aligned_buffer_page_end(dst_u_c);
1449 free_aligned_buffer_page_end(dst_v_c);
1450 free_aligned_buffer_page_end(dst_y_opt);
1451 free_aligned_buffer_page_end(dst_u_opt);
1452 free_aligned_buffer_page_end(dst_v_opt);
1453 }
1454
TEST_F(LibYUVPlanarTest,I420Blend_Opt)1455 TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
1456 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1457 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1458 }
TEST_F(LibYUVPlanarTest,I420Blend_Unaligned)1459 TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
1460 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1461 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1462 }
1463
1464 // TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable.
TEST_F(LibYUVPlanarTest,DISABLED_I420Blend_Any)1465 TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
1466 TestI420Blend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1467 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1468 }
TEST_F(LibYUVPlanarTest,I420Blend_Invert)1469 TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
1470 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1471 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1472 }
1473
TEST_F(LibYUVPlanarTest,TestAffine)1474 TEST_F(LibYUVPlanarTest, TestAffine) {
1475 SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
1476 SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]);
1477
1478 for (int i = 0; i < 1280; ++i) {
1479 for (int j = 0; j < 4; ++j) {
1480 orig_pixels_0[i][j] = i;
1481 }
1482 }
1483
1484 float uv_step[4] = {0.f, 0.f, 0.75f, 0.f};
1485
1486 ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step,
1487 1280);
1488 EXPECT_EQ(0u, interpolate_pixels_C[0][0]);
1489 EXPECT_EQ(96u, interpolate_pixels_C[128][0]);
1490 EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
1491
1492 #if defined(HAS_ARGBAFFINEROW_SSE2)
1493 SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]);
1494 ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1495 uv_step, 1280);
1496 EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
1497
1498 int has_sse2 = TestCpuFlag(kCpuHasSSE2);
1499 if (has_sse2) {
1500 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1501 ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1502 uv_step, 1280);
1503 }
1504 }
1505 #endif
1506 }
1507
TEST_F(LibYUVPlanarTest,TestCopyPlane)1508 TEST_F(LibYUVPlanarTest, TestCopyPlane) {
1509 int err = 0;
1510 int yw = benchmark_width_;
1511 int yh = benchmark_height_;
1512 int b = 12;
1513 int i, j;
1514
1515 int y_plane_size = (yw + b * 2) * (yh + b * 2);
1516 align_buffer_page_end(orig_y, y_plane_size);
1517 align_buffer_page_end(dst_c, y_plane_size);
1518 align_buffer_page_end(dst_opt, y_plane_size);
1519
1520 memset(orig_y, 0, y_plane_size);
1521 memset(dst_c, 0, y_plane_size);
1522 memset(dst_opt, 0, y_plane_size);
1523
1524 // Fill image buffers with random data.
1525 for (i = b; i < (yh + b); ++i) {
1526 for (j = b; j < (yw + b); ++j) {
1527 orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
1528 }
1529 }
1530
1531 // Fill destination buffers with random data.
1532 for (i = 0; i < y_plane_size; ++i) {
1533 uint8_t random_number = fastrand() & 0x7f;
1534 dst_c[i] = random_number;
1535 dst_opt[i] = dst_c[i];
1536 }
1537
1538 int y_off = b * (yw + b * 2) + b;
1539
1540 int y_st = yw + b * 2;
1541 int stride = 8;
1542
1543 // Disable all optimizations.
1544 MaskCpuFlags(disable_cpu_flags_);
1545 for (j = 0; j < benchmark_iterations_; j++) {
1546 CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
1547 }
1548
1549 // Enable optimizations.
1550 MaskCpuFlags(benchmark_cpu_info_);
1551 for (j = 0; j < benchmark_iterations_; j++) {
1552 CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
1553 }
1554
1555 for (i = 0; i < y_plane_size; ++i) {
1556 if (dst_c[i] != dst_opt[i]) {
1557 ++err;
1558 }
1559 }
1560
1561 free_aligned_buffer_page_end(orig_y);
1562 free_aligned_buffer_page_end(dst_c);
1563 free_aligned_buffer_page_end(dst_opt);
1564
1565 EXPECT_EQ(0, err);
1566 }
1567
TEST_F(LibYUVPlanarTest,CopyPlane_Opt)1568 TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
1569 int i;
1570 int y_plane_size = benchmark_width_ * benchmark_height_;
1571 align_buffer_page_end(orig_y, y_plane_size);
1572 align_buffer_page_end(dst_c, y_plane_size);
1573 align_buffer_page_end(dst_opt, y_plane_size);
1574
1575 MemRandomize(orig_y, y_plane_size);
1576 memset(dst_c, 1, y_plane_size);
1577 memset(dst_opt, 2, y_plane_size);
1578
1579 // Disable all optimizations.
1580 MaskCpuFlags(disable_cpu_flags_);
1581 for (i = 0; i < benchmark_iterations_; i++) {
1582 CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_,
1583 benchmark_width_, benchmark_height_);
1584 }
1585
1586 // Enable optimizations.
1587 MaskCpuFlags(benchmark_cpu_info_);
1588 for (i = 0; i < benchmark_iterations_; i++) {
1589 CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_,
1590 benchmark_width_, benchmark_height_);
1591 }
1592
1593 for (i = 0; i < y_plane_size; ++i) {
1594 EXPECT_EQ(dst_c[i], dst_opt[i]);
1595 }
1596
1597 free_aligned_buffer_page_end(orig_y);
1598 free_aligned_buffer_page_end(dst_c);
1599 free_aligned_buffer_page_end(dst_opt);
1600 }
1601
TEST_F(LibYUVPlanarTest,TestCopyPlaneZero)1602 TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) {
1603 // Test to verify copying a rect with a zero height or width does
1604 // not touch destination memory.
1605 uint8_t src = 42;
1606 uint8_t dst = 0;
1607
1608 // Disable all optimizations.
1609 MaskCpuFlags(disable_cpu_flags_);
1610 CopyPlane(&src, 0, &dst, 0, 0, 0);
1611 EXPECT_EQ(src, 42);
1612 EXPECT_EQ(dst, 0);
1613
1614 CopyPlane(&src, 1, &dst, 1, 1, 0);
1615 EXPECT_EQ(src, 42);
1616 EXPECT_EQ(dst, 0);
1617
1618 CopyPlane(&src, 1, &dst, 1, 0, 1);
1619 EXPECT_EQ(src, 42);
1620 EXPECT_EQ(dst, 0);
1621
1622 // Enable optimizations.
1623 MaskCpuFlags(benchmark_cpu_info_);
1624 CopyPlane(&src, 0, &dst, 0, 0, 0);
1625 EXPECT_EQ(src, 42);
1626 EXPECT_EQ(dst, 0);
1627
1628 CopyPlane(&src, 1, &dst, 1, 1, 0);
1629 EXPECT_EQ(src, 42);
1630 EXPECT_EQ(dst, 0);
1631
1632 CopyPlane(&src, 1, &dst, 1, 0, 1);
1633 EXPECT_EQ(src, 42);
1634 EXPECT_EQ(dst, 0);
1635 }
1636
TEST_F(LibYUVPlanarTest,TestDetilePlane)1637 TEST_F(LibYUVPlanarTest, TestDetilePlane) {
1638 int i, j;
1639
1640 // orig is tiled. Allocate enough memory for tiles.
1641 int orig_width = (benchmark_width_ + 15) & ~15;
1642 int orig_height = (benchmark_height_ + 15) & ~15;
1643 int orig_plane_size = orig_width * orig_height;
1644 int y_plane_size = benchmark_width_ * benchmark_height_;
1645 align_buffer_page_end(orig_y, orig_plane_size);
1646 align_buffer_page_end(dst_c, y_plane_size);
1647 align_buffer_page_end(dst_opt, y_plane_size);
1648
1649 MemRandomize(orig_y, orig_plane_size);
1650 memset(dst_c, 0, y_plane_size);
1651 memset(dst_opt, 0, y_plane_size);
1652
1653 // Disable all optimizations.
1654 MaskCpuFlags(disable_cpu_flags_);
1655 for (j = 0; j < benchmark_iterations_; j++) {
1656 DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_,
1657 benchmark_height_, 16);
1658 }
1659
1660 // Enable optimizations.
1661 MaskCpuFlags(benchmark_cpu_info_);
1662 for (j = 0; j < benchmark_iterations_; j++) {
1663 DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_,
1664 benchmark_height_, 16);
1665 }
1666
1667 for (i = 0; i < y_plane_size; ++i) {
1668 EXPECT_EQ(dst_c[i], dst_opt[i]);
1669 }
1670
1671 free_aligned_buffer_page_end(orig_y);
1672 free_aligned_buffer_page_end(dst_c);
1673 free_aligned_buffer_page_end(dst_opt);
1674 }
1675
1676 // Compares DetileSplitUV to 2 step Detile + SplitUV
TEST_F(LibYUVPlanarTest,TestDetileSplitUVPlane_Correctness)1677 TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
1678 int i, j;
1679
1680 // orig is tiled. Allocate enough memory for tiles.
1681 int orig_width = (benchmark_width_ + 15) & ~15;
1682 int orig_height = (benchmark_height_ + 15) & ~15;
1683 int orig_plane_size = orig_width * orig_height;
1684 int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
1685 align_buffer_page_end(orig_uv, orig_plane_size);
1686 align_buffer_page_end(detiled_uv, orig_plane_size);
1687 align_buffer_page_end(dst_u_two_stage, uv_plane_size);
1688 align_buffer_page_end(dst_u_opt, uv_plane_size);
1689 align_buffer_page_end(dst_v_two_stage, uv_plane_size);
1690 align_buffer_page_end(dst_v_opt, uv_plane_size);
1691
1692 MemRandomize(orig_uv, orig_plane_size);
1693 memset(detiled_uv, 0, orig_plane_size);
1694 memset(dst_u_two_stage, 0, uv_plane_size);
1695 memset(dst_u_opt, 0, uv_plane_size);
1696 memset(dst_v_two_stage, 0, uv_plane_size);
1697 memset(dst_v_opt, 0, uv_plane_size);
1698
1699 DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2,
1700 dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
1701 benchmark_height_, 16);
1702
1703 // Benchmark 2 step conversion for comparison.
1704 for (j = 0; j < benchmark_iterations_; j++) {
1705 DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
1706 benchmark_width_, benchmark_height_, 16);
1707 SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
1708 (benchmark_width_ + 1) / 2, dst_v_two_stage,
1709 (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
1710 benchmark_height_);
1711 }
1712
1713 for (i = 0; i < uv_plane_size; ++i) {
1714 EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
1715 EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
1716 }
1717
1718 free_aligned_buffer_page_end(orig_uv);
1719 free_aligned_buffer_page_end(detiled_uv);
1720 free_aligned_buffer_page_end(dst_u_two_stage);
1721 free_aligned_buffer_page_end(dst_u_opt);
1722 free_aligned_buffer_page_end(dst_v_two_stage);
1723 free_aligned_buffer_page_end(dst_v_opt);
1724 }
1725
TEST_F(LibYUVPlanarTest,TestDetileSplitUVPlane_Benchmark)1726 TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
1727 int i, j;
1728
1729 // orig is tiled. Allocate enough memory for tiles.
1730 int orig_width = (benchmark_width_ + 15) & ~15;
1731 int orig_height = (benchmark_height_ + 15) & ~15;
1732 int orig_plane_size = orig_width * orig_height;
1733 int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
1734 align_buffer_page_end(orig_uv, orig_plane_size);
1735 align_buffer_page_end(dst_u_c, uv_plane_size);
1736 align_buffer_page_end(dst_u_opt, uv_plane_size);
1737 align_buffer_page_end(dst_v_c, uv_plane_size);
1738 align_buffer_page_end(dst_v_opt, uv_plane_size);
1739
1740 MemRandomize(orig_uv, orig_plane_size);
1741 memset(dst_u_c, 0, uv_plane_size);
1742 memset(dst_u_opt, 0, uv_plane_size);
1743 memset(dst_v_c, 0, uv_plane_size);
1744 memset(dst_v_opt, 0, uv_plane_size);
1745
1746 // Disable all optimizations.
1747 MaskCpuFlags(disable_cpu_flags_);
1748
1749 DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
1750 dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
1751 benchmark_height_, 16);
1752
1753 // Enable optimizations.
1754 MaskCpuFlags(benchmark_cpu_info_);
1755
1756 for (j = 0; j < benchmark_iterations_; j++) {
1757 DetileSplitUVPlane(
1758 orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
1759 (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
1760 }
1761
1762 for (i = 0; i < uv_plane_size; ++i) {
1763 EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
1764 EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
1765 }
1766
1767 free_aligned_buffer_page_end(orig_uv);
1768 free_aligned_buffer_page_end(dst_u_c);
1769 free_aligned_buffer_page_end(dst_u_opt);
1770 free_aligned_buffer_page_end(dst_v_c);
1771 free_aligned_buffer_page_end(dst_v_opt);
1772 }
1773
TestMultiply(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1774 static int TestMultiply(int width,
1775 int height,
1776 int benchmark_iterations,
1777 int disable_cpu_flags,
1778 int benchmark_cpu_info,
1779 int invert,
1780 int off) {
1781 if (width < 1) {
1782 width = 1;
1783 }
1784 const int kBpp = 4;
1785 const int kStride = width * kBpp;
1786 align_buffer_page_end(src_argb_a, kStride * height + off);
1787 align_buffer_page_end(src_argb_b, kStride * height + off);
1788 align_buffer_page_end(dst_argb_c, kStride * height);
1789 align_buffer_page_end(dst_argb_opt, kStride * height);
1790 for (int i = 0; i < kStride * height; ++i) {
1791 src_argb_a[i + off] = (fastrand() & 0xff);
1792 src_argb_b[i + off] = (fastrand() & 0xff);
1793 }
1794 memset(dst_argb_c, 0, kStride * height);
1795 memset(dst_argb_opt, 0, kStride * height);
1796
1797 MaskCpuFlags(disable_cpu_flags);
1798 ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1799 kStride, width, invert * height);
1800 MaskCpuFlags(benchmark_cpu_info);
1801 for (int i = 0; i < benchmark_iterations; ++i) {
1802 ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride,
1803 dst_argb_opt, kStride, width, invert * height);
1804 }
1805 int max_diff = 0;
1806 for (int i = 0; i < kStride * height; ++i) {
1807 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1808 static_cast<int>(dst_argb_opt[i]));
1809 if (abs_diff > max_diff) {
1810 max_diff = abs_diff;
1811 }
1812 }
1813 free_aligned_buffer_page_end(src_argb_a);
1814 free_aligned_buffer_page_end(src_argb_b);
1815 free_aligned_buffer_page_end(dst_argb_c);
1816 free_aligned_buffer_page_end(dst_argb_opt);
1817 return max_diff;
1818 }
1819
TEST_F(LibYUVPlanarTest,ARGBMultiply_Any)1820 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
1821 int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_,
1822 benchmark_iterations_, disable_cpu_flags_,
1823 benchmark_cpu_info_, +1, 0);
1824 EXPECT_LE(max_diff, 1);
1825 }
1826
TEST_F(LibYUVPlanarTest,ARGBMultiply_Unaligned)1827 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
1828 int max_diff =
1829 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1830 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1831 EXPECT_LE(max_diff, 1);
1832 }
1833
TEST_F(LibYUVPlanarTest,ARGBMultiply_Invert)1834 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
1835 int max_diff =
1836 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1837 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1838 EXPECT_LE(max_diff, 1);
1839 }
1840
TEST_F(LibYUVPlanarTest,ARGBMultiply_Opt)1841 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
1842 int max_diff =
1843 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1844 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1845 EXPECT_LE(max_diff, 1);
1846 }
1847
TestAdd(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1848 static int TestAdd(int width,
1849 int height,
1850 int benchmark_iterations,
1851 int disable_cpu_flags,
1852 int benchmark_cpu_info,
1853 int invert,
1854 int off) {
1855 if (width < 1) {
1856 width = 1;
1857 }
1858 const int kBpp = 4;
1859 const int kStride = width * kBpp;
1860 align_buffer_page_end(src_argb_a, kStride * height + off);
1861 align_buffer_page_end(src_argb_b, kStride * height + off);
1862 align_buffer_page_end(dst_argb_c, kStride * height);
1863 align_buffer_page_end(dst_argb_opt, kStride * height);
1864 for (int i = 0; i < kStride * height; ++i) {
1865 src_argb_a[i + off] = (fastrand() & 0xff);
1866 src_argb_b[i + off] = (fastrand() & 0xff);
1867 }
1868 memset(dst_argb_c, 0, kStride * height);
1869 memset(dst_argb_opt, 0, kStride * height);
1870
1871 MaskCpuFlags(disable_cpu_flags);
1872 ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1873 kStride, width, invert * height);
1874 MaskCpuFlags(benchmark_cpu_info);
1875 for (int i = 0; i < benchmark_iterations; ++i) {
1876 ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt,
1877 kStride, width, invert * height);
1878 }
1879 int max_diff = 0;
1880 for (int i = 0; i < kStride * height; ++i) {
1881 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1882 static_cast<int>(dst_argb_opt[i]));
1883 if (abs_diff > max_diff) {
1884 max_diff = abs_diff;
1885 }
1886 }
1887 free_aligned_buffer_page_end(src_argb_a);
1888 free_aligned_buffer_page_end(src_argb_b);
1889 free_aligned_buffer_page_end(dst_argb_c);
1890 free_aligned_buffer_page_end(dst_argb_opt);
1891 return max_diff;
1892 }
1893
TEST_F(LibYUVPlanarTest,ARGBAdd_Any)1894 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
1895 int max_diff =
1896 TestAdd(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1897 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1898 EXPECT_LE(max_diff, 1);
1899 }
1900
TEST_F(LibYUVPlanarTest,ARGBAdd_Unaligned)1901 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) {
1902 int max_diff =
1903 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1904 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1905 EXPECT_LE(max_diff, 1);
1906 }
1907
TEST_F(LibYUVPlanarTest,ARGBAdd_Invert)1908 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) {
1909 int max_diff =
1910 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1911 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1912 EXPECT_LE(max_diff, 1);
1913 }
1914
TEST_F(LibYUVPlanarTest,ARGBAdd_Opt)1915 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) {
1916 int max_diff =
1917 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1918 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1919 EXPECT_LE(max_diff, 1);
1920 }
1921
TestSubtract(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1922 static int TestSubtract(int width,
1923 int height,
1924 int benchmark_iterations,
1925 int disable_cpu_flags,
1926 int benchmark_cpu_info,
1927 int invert,
1928 int off) {
1929 if (width < 1) {
1930 width = 1;
1931 }
1932 const int kBpp = 4;
1933 const int kStride = width * kBpp;
1934 align_buffer_page_end(src_argb_a, kStride * height + off);
1935 align_buffer_page_end(src_argb_b, kStride * height + off);
1936 align_buffer_page_end(dst_argb_c, kStride * height);
1937 align_buffer_page_end(dst_argb_opt, kStride * height);
1938 for (int i = 0; i < kStride * height; ++i) {
1939 src_argb_a[i + off] = (fastrand() & 0xff);
1940 src_argb_b[i + off] = (fastrand() & 0xff);
1941 }
1942 memset(dst_argb_c, 0, kStride * height);
1943 memset(dst_argb_opt, 0, kStride * height);
1944
1945 MaskCpuFlags(disable_cpu_flags);
1946 ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1947 kStride, width, invert * height);
1948 MaskCpuFlags(benchmark_cpu_info);
1949 for (int i = 0; i < benchmark_iterations; ++i) {
1950 ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride,
1951 dst_argb_opt, kStride, width, invert * height);
1952 }
1953 int max_diff = 0;
1954 for (int i = 0; i < kStride * height; ++i) {
1955 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1956 static_cast<int>(dst_argb_opt[i]));
1957 if (abs_diff > max_diff) {
1958 max_diff = abs_diff;
1959 }
1960 }
1961 free_aligned_buffer_page_end(src_argb_a);
1962 free_aligned_buffer_page_end(src_argb_b);
1963 free_aligned_buffer_page_end(dst_argb_c);
1964 free_aligned_buffer_page_end(dst_argb_opt);
1965 return max_diff;
1966 }
1967
TEST_F(LibYUVPlanarTest,ARGBSubtract_Any)1968 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
1969 int max_diff = TestSubtract(benchmark_width_ + 1, benchmark_height_,
1970 benchmark_iterations_, disable_cpu_flags_,
1971 benchmark_cpu_info_, +1, 0);
1972 EXPECT_LE(max_diff, 1);
1973 }
1974
TEST_F(LibYUVPlanarTest,ARGBSubtract_Unaligned)1975 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) {
1976 int max_diff =
1977 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1978 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1979 EXPECT_LE(max_diff, 1);
1980 }
1981
TEST_F(LibYUVPlanarTest,ARGBSubtract_Invert)1982 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) {
1983 int max_diff =
1984 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1985 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1986 EXPECT_LE(max_diff, 1);
1987 }
1988
TEST_F(LibYUVPlanarTest,ARGBSubtract_Opt)1989 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) {
1990 int max_diff =
1991 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1992 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1993 EXPECT_LE(max_diff, 1);
1994 }
1995
TestSobel(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1996 static int TestSobel(int width,
1997 int height,
1998 int benchmark_iterations,
1999 int disable_cpu_flags,
2000 int benchmark_cpu_info,
2001 int invert,
2002 int off) {
2003 if (width < 1) {
2004 width = 1;
2005 }
2006 const int kBpp = 4;
2007 const int kStride = width * kBpp;
2008 align_buffer_page_end(src_argb_a, kStride * height + off);
2009 align_buffer_page_end(dst_argb_c, kStride * height);
2010 align_buffer_page_end(dst_argb_opt, kStride * height);
2011 memset(src_argb_a, 0, kStride * height + off);
2012 for (int i = 0; i < kStride * height; ++i) {
2013 src_argb_a[i + off] = (fastrand() & 0xff);
2014 }
2015 memset(dst_argb_c, 0, kStride * height);
2016 memset(dst_argb_opt, 0, kStride * height);
2017
2018 MaskCpuFlags(disable_cpu_flags);
2019 ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width,
2020 invert * height);
2021 MaskCpuFlags(benchmark_cpu_info);
2022 for (int i = 0; i < benchmark_iterations; ++i) {
2023 ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
2024 invert * height);
2025 }
2026 int max_diff = 0;
2027 for (int i = 0; i < kStride * height; ++i) {
2028 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2029 static_cast<int>(dst_argb_opt[i]));
2030 if (abs_diff > max_diff) {
2031 max_diff = abs_diff;
2032 }
2033 }
2034 free_aligned_buffer_page_end(src_argb_a);
2035 free_aligned_buffer_page_end(dst_argb_c);
2036 free_aligned_buffer_page_end(dst_argb_opt);
2037 return max_diff;
2038 }
2039
TEST_F(LibYUVPlanarTest,ARGBSobel_Any)2040 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
2041 int max_diff =
2042 TestSobel(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2043 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2044 EXPECT_EQ(0, max_diff);
2045 }
2046
TEST_F(LibYUVPlanarTest,ARGBSobel_Unaligned)2047 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) {
2048 int max_diff =
2049 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2050 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2051 EXPECT_EQ(0, max_diff);
2052 }
2053
TEST_F(LibYUVPlanarTest,ARGBSobel_Invert)2054 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) {
2055 int max_diff =
2056 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2057 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2058 EXPECT_EQ(0, max_diff);
2059 }
2060
TEST_F(LibYUVPlanarTest,ARGBSobel_Opt)2061 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) {
2062 int max_diff =
2063 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2064 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2065 EXPECT_EQ(0, max_diff);
2066 }
2067
TestSobelToPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2068 static int TestSobelToPlane(int width,
2069 int height,
2070 int benchmark_iterations,
2071 int disable_cpu_flags,
2072 int benchmark_cpu_info,
2073 int invert,
2074 int off) {
2075 if (width < 1) {
2076 width = 1;
2077 }
2078 const int kSrcBpp = 4;
2079 const int kDstBpp = 1;
2080 const int kSrcStride = (width * kSrcBpp + 15) & ~15;
2081 const int kDstStride = (width * kDstBpp + 15) & ~15;
2082 align_buffer_page_end(src_argb_a, kSrcStride * height + off);
2083 align_buffer_page_end(dst_argb_c, kDstStride * height);
2084 align_buffer_page_end(dst_argb_opt, kDstStride * height);
2085 memset(src_argb_a, 0, kSrcStride * height + off);
2086 for (int i = 0; i < kSrcStride * height; ++i) {
2087 src_argb_a[i + off] = (fastrand() & 0xff);
2088 }
2089 memset(dst_argb_c, 0, kDstStride * height);
2090 memset(dst_argb_opt, 0, kDstStride * height);
2091
2092 MaskCpuFlags(disable_cpu_flags);
2093 ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width,
2094 invert * height);
2095 MaskCpuFlags(benchmark_cpu_info);
2096 for (int i = 0; i < benchmark_iterations; ++i) {
2097 ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride,
2098 width, invert * height);
2099 }
2100 int max_diff = 0;
2101 for (int i = 0; i < kDstStride * height; ++i) {
2102 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2103 static_cast<int>(dst_argb_opt[i]));
2104 if (abs_diff > max_diff) {
2105 max_diff = abs_diff;
2106 }
2107 }
2108 free_aligned_buffer_page_end(src_argb_a);
2109 free_aligned_buffer_page_end(dst_argb_c);
2110 free_aligned_buffer_page_end(dst_argb_opt);
2111 return max_diff;
2112 }
2113
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Any)2114 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
2115 int max_diff = TestSobelToPlane(benchmark_width_ + 1, benchmark_height_,
2116 benchmark_iterations_, disable_cpu_flags_,
2117 benchmark_cpu_info_, +1, 0);
2118 EXPECT_EQ(0, max_diff);
2119 }
2120
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Unaligned)2121 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) {
2122 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2123 benchmark_iterations_, disable_cpu_flags_,
2124 benchmark_cpu_info_, +1, 1);
2125 EXPECT_EQ(0, max_diff);
2126 }
2127
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Invert)2128 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) {
2129 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2130 benchmark_iterations_, disable_cpu_flags_,
2131 benchmark_cpu_info_, -1, 0);
2132 EXPECT_EQ(0, max_diff);
2133 }
2134
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Opt)2135 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) {
2136 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2137 benchmark_iterations_, disable_cpu_flags_,
2138 benchmark_cpu_info_, +1, 0);
2139 EXPECT_EQ(0, max_diff);
2140 }
2141
TestSobelXY(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2142 static int TestSobelXY(int width,
2143 int height,
2144 int benchmark_iterations,
2145 int disable_cpu_flags,
2146 int benchmark_cpu_info,
2147 int invert,
2148 int off) {
2149 if (width < 1) {
2150 width = 1;
2151 }
2152 const int kBpp = 4;
2153 const int kStride = width * kBpp;
2154 align_buffer_page_end(src_argb_a, kStride * height + off);
2155 align_buffer_page_end(dst_argb_c, kStride * height);
2156 align_buffer_page_end(dst_argb_opt, kStride * height);
2157 memset(src_argb_a, 0, kStride * height + off);
2158 for (int i = 0; i < kStride * height; ++i) {
2159 src_argb_a[i + off] = (fastrand() & 0xff);
2160 }
2161 memset(dst_argb_c, 0, kStride * height);
2162 memset(dst_argb_opt, 0, kStride * height);
2163
2164 MaskCpuFlags(disable_cpu_flags);
2165 ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width,
2166 invert * height);
2167 MaskCpuFlags(benchmark_cpu_info);
2168 for (int i = 0; i < benchmark_iterations; ++i) {
2169 ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
2170 invert * height);
2171 }
2172 int max_diff = 0;
2173 for (int i = 0; i < kStride * height; ++i) {
2174 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2175 static_cast<int>(dst_argb_opt[i]));
2176 if (abs_diff > max_diff) {
2177 max_diff = abs_diff;
2178 }
2179 }
2180 free_aligned_buffer_page_end(src_argb_a);
2181 free_aligned_buffer_page_end(dst_argb_c);
2182 free_aligned_buffer_page_end(dst_argb_opt);
2183 return max_diff;
2184 }
2185
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Any)2186 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
2187 int max_diff = TestSobelXY(benchmark_width_ + 1, benchmark_height_,
2188 benchmark_iterations_, disable_cpu_flags_,
2189 benchmark_cpu_info_, +1, 0);
2190 EXPECT_EQ(0, max_diff);
2191 }
2192
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Unaligned)2193 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) {
2194 int max_diff =
2195 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2196 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2197 EXPECT_EQ(0, max_diff);
2198 }
2199
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Invert)2200 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) {
2201 int max_diff =
2202 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2203 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2204 EXPECT_EQ(0, max_diff);
2205 }
2206
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Opt)2207 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) {
2208 int max_diff =
2209 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2210 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2211 EXPECT_EQ(0, max_diff);
2212 }
2213
TestBlur(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int radius)2214 static int TestBlur(int width,
2215 int height,
2216 int benchmark_iterations,
2217 int disable_cpu_flags,
2218 int benchmark_cpu_info,
2219 int invert,
2220 int off,
2221 int radius) {
2222 if (width < 1) {
2223 width = 1;
2224 }
2225 const int kBpp = 4;
2226 const int kStride = width * kBpp;
2227 align_buffer_page_end(src_argb_a, kStride * height + off);
2228 align_buffer_page_end(dst_cumsum, width * height * 16);
2229 align_buffer_page_end(dst_argb_c, kStride * height);
2230 align_buffer_page_end(dst_argb_opt, kStride * height);
2231 for (int i = 0; i < kStride * height; ++i) {
2232 src_argb_a[i + off] = (fastrand() & 0xff);
2233 }
2234 memset(dst_cumsum, 0, width * height * 16);
2235 memset(dst_argb_c, 0, kStride * height);
2236 memset(dst_argb_opt, 0, kStride * height);
2237
2238 MaskCpuFlags(disable_cpu_flags);
2239 ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
2240 reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
2241 invert * height, radius);
2242 MaskCpuFlags(benchmark_cpu_info);
2243 for (int i = 0; i < benchmark_iterations; ++i) {
2244 ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
2245 reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
2246 invert * height, radius);
2247 }
2248 int max_diff = 0;
2249 for (int i = 0; i < kStride * height; ++i) {
2250 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2251 static_cast<int>(dst_argb_opt[i]));
2252 if (abs_diff > max_diff) {
2253 max_diff = abs_diff;
2254 }
2255 }
2256 free_aligned_buffer_page_end(src_argb_a);
2257 free_aligned_buffer_page_end(dst_cumsum);
2258 free_aligned_buffer_page_end(dst_argb_c);
2259 free_aligned_buffer_page_end(dst_argb_opt);
2260 return max_diff;
2261 }
2262
2263 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
2264 #define DISABLED_ARM(name) name
2265 #else
2266 #define DISABLED_ARM(name) DISABLED_##name
2267 #endif
2268
2269 static const int kBlurSize = 55;
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Any))2270 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Any)) {
2271 int max_diff =
2272 TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2273 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
2274 EXPECT_LE(max_diff, 1);
2275 }
2276
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Unaligned))2277 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Unaligned)) {
2278 int max_diff =
2279 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2280 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
2281 EXPECT_LE(max_diff, 1);
2282 }
2283
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Invert))2284 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Invert)) {
2285 int max_diff =
2286 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2287 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
2288 EXPECT_LE(max_diff, 1);
2289 }
2290
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Opt))2291 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Opt)) {
2292 int max_diff =
2293 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2294 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
2295 EXPECT_LE(max_diff, 1);
2296 }
2297
2298 static const int kBlurSmallSize = 5;
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Any))2299 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Any)) {
2300 int max_diff =
2301 TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2302 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
2303 EXPECT_LE(max_diff, 1);
2304 }
2305
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Unaligned))2306 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Unaligned)) {
2307 int max_diff =
2308 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2309 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
2310 EXPECT_LE(max_diff, 1);
2311 }
2312
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Invert))2313 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Invert)) {
2314 int max_diff =
2315 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2316 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
2317 EXPECT_LE(max_diff, 1);
2318 }
2319
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Opt))2320 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Opt)) {
2321 int max_diff =
2322 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2323 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
2324 EXPECT_LE(max_diff, 1);
2325 }
2326
TEST_F(LibYUVPlanarTest,DISABLED_ARM (TestARGBPolynomial))2327 TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) {
2328 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2329 SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2330 SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2331 memset(orig_pixels, 0, sizeof(orig_pixels));
2332
2333 SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
2334 0.94230f, -3.03300f, -2.92500f, 0.f, // C0
2335 0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x
2336 0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x
2337 0.0f, 0.000006965f, 0.000008781f, 0.f, // C3 x * x * x
2338 };
2339
2340 // Test blue
2341 orig_pixels[0][0] = 255u;
2342 orig_pixels[0][1] = 0u;
2343 orig_pixels[0][2] = 0u;
2344 orig_pixels[0][3] = 128u;
2345 // Test green
2346 orig_pixels[1][0] = 0u;
2347 orig_pixels[1][1] = 255u;
2348 orig_pixels[1][2] = 0u;
2349 orig_pixels[1][3] = 0u;
2350 // Test red
2351 orig_pixels[2][0] = 0u;
2352 orig_pixels[2][1] = 0u;
2353 orig_pixels[2][2] = 255u;
2354 orig_pixels[2][3] = 255u;
2355 // Test white
2356 orig_pixels[3][0] = 255u;
2357 orig_pixels[3][1] = 255u;
2358 orig_pixels[3][2] = 255u;
2359 orig_pixels[3][3] = 255u;
2360 // Test color
2361 orig_pixels[4][0] = 16u;
2362 orig_pixels[4][1] = 64u;
2363 orig_pixels[4][2] = 192u;
2364 orig_pixels[4][3] = 224u;
2365 // Do 16 to test asm version.
2366 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2367 &kWarmifyPolynomial[0], 16, 1);
2368 EXPECT_EQ(235u, dst_pixels_opt[0][0]);
2369 EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2370 EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2371 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2372 EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2373 EXPECT_EQ(233u, dst_pixels_opt[1][1]);
2374 EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2375 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2376 EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2377 EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2378 EXPECT_EQ(241u, dst_pixels_opt[2][2]);
2379 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2380 EXPECT_EQ(235u, dst_pixels_opt[3][0]);
2381 EXPECT_EQ(233u, dst_pixels_opt[3][1]);
2382 EXPECT_EQ(241u, dst_pixels_opt[3][2]);
2383 EXPECT_EQ(255u, dst_pixels_opt[3][3]);
2384 EXPECT_EQ(10u, dst_pixels_opt[4][0]);
2385 EXPECT_EQ(59u, dst_pixels_opt[4][1]);
2386 EXPECT_EQ(188u, dst_pixels_opt[4][2]);
2387 EXPECT_EQ(224u, dst_pixels_opt[4][3]);
2388
2389 for (int i = 0; i < 1280; ++i) {
2390 orig_pixels[i][0] = i;
2391 orig_pixels[i][1] = i / 2;
2392 orig_pixels[i][2] = i / 3;
2393 orig_pixels[i][3] = i;
2394 }
2395
2396 MaskCpuFlags(disable_cpu_flags_);
2397 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2398 &kWarmifyPolynomial[0], 1280, 1);
2399 MaskCpuFlags(benchmark_cpu_info_);
2400
2401 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2402 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2403 &kWarmifyPolynomial[0], 1280, 1);
2404 }
2405
2406 for (int i = 0; i < 1280; ++i) {
2407 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2408 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2409 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2410 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2411 }
2412 }
2413
TestHalfFloatPlane(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale,int mask)2414 int TestHalfFloatPlane(int benchmark_width,
2415 int benchmark_height,
2416 int benchmark_iterations,
2417 int disable_cpu_flags,
2418 int benchmark_cpu_info,
2419 float scale,
2420 int mask) {
2421 int i, j;
2422 const int y_plane_size = benchmark_width * benchmark_height * 2;
2423
2424 align_buffer_page_end(orig_y, y_plane_size * 3);
2425 uint8_t* dst_opt = orig_y + y_plane_size;
2426 uint8_t* dst_c = orig_y + y_plane_size * 2;
2427
2428 MemRandomize(orig_y, y_plane_size);
2429 memset(dst_c, 0, y_plane_size);
2430 memset(dst_opt, 1, y_plane_size);
2431
2432 for (i = 0; i < y_plane_size / 2; ++i) {
2433 reinterpret_cast<uint16_t*>(orig_y)[i] &= mask;
2434 }
2435
2436 // Disable all optimizations.
2437 MaskCpuFlags(disable_cpu_flags);
2438 for (j = 0; j < benchmark_iterations; j++) {
2439 HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2440 reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
2441 scale, benchmark_width, benchmark_height);
2442 }
2443
2444 // Enable optimizations.
2445 MaskCpuFlags(benchmark_cpu_info);
2446 for (j = 0; j < benchmark_iterations; j++) {
2447 HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2448 reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
2449 scale, benchmark_width, benchmark_height);
2450 }
2451
2452 int max_diff = 0;
2453 for (i = 0; i < y_plane_size / 2; ++i) {
2454 int abs_diff =
2455 abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
2456 static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
2457 if (abs_diff > max_diff) {
2458 max_diff = abs_diff;
2459 }
2460 }
2461
2462 free_aligned_buffer_page_end(orig_y);
2463 return max_diff;
2464 }
2465
2466 #if defined(__arm__)
EnableFlushDenormalToZero(void)2467 static void EnableFlushDenormalToZero(void) {
2468 uint32_t cw;
2469 __asm__ __volatile__(
2470 "vmrs %0, fpscr \n"
2471 "orr %0, %0, #0x1000000 \n"
2472 "vmsr fpscr, %0 \n"
2473 : "=r"(cw)::"memory");
2474 }
2475 #endif
2476
2477 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
2478 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
2479 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
2480
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_denormal)2481 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
2482 // 32 bit arm rounding on denormal case is off by 1 compared to C.
2483 #if defined(__arm__)
2484 EnableFlushDenormalToZero();
2485 #endif
2486 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2487 benchmark_iterations_, disable_cpu_flags_,
2488 benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
2489 EXPECT_EQ(0, diff);
2490 }
2491
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_One)2492 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
2493 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2494 benchmark_iterations_, disable_cpu_flags_,
2495 benchmark_cpu_info_, 1.0f, 65535);
2496 EXPECT_LE(diff, 1);
2497 }
2498
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_Opt)2499 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
2500 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2501 benchmark_iterations_, disable_cpu_flags_,
2502 benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
2503 EXPECT_EQ(0, diff);
2504 }
2505
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_10bit_Opt)2506 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
2507 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2508 benchmark_iterations_, disable_cpu_flags_,
2509 benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
2510 EXPECT_EQ(0, diff);
2511 }
2512
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_9bit_Opt)2513 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
2514 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2515 benchmark_iterations_, disable_cpu_flags_,
2516 benchmark_cpu_info_, 1.0f / 512.0f, 511);
2517 EXPECT_EQ(0, diff);
2518 }
2519
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Opt)2520 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
2521 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2522 benchmark_iterations_, disable_cpu_flags_,
2523 benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
2524 EXPECT_EQ(0, diff);
2525 }
2526
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Offby1)2527 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
2528 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2529 benchmark_iterations_, disable_cpu_flags_,
2530 benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
2531 EXPECT_EQ(0, diff);
2532 }
2533
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_One)2534 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
2535 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2536 benchmark_iterations_, disable_cpu_flags_,
2537 benchmark_cpu_info_, 1.0f, 2047);
2538 EXPECT_EQ(0, diff);
2539 }
2540
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_12bit_One)2541 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
2542 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2543 benchmark_iterations_, disable_cpu_flags_,
2544 benchmark_cpu_info_, 1.0f, 4095);
2545 EXPECT_LE(diff, 1);
2546 }
2547
TestByteToFloat(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale)2548 float TestByteToFloat(int benchmark_width,
2549 int benchmark_height,
2550 int benchmark_iterations,
2551 int disable_cpu_flags,
2552 int benchmark_cpu_info,
2553 float scale) {
2554 int i, j;
2555 const int y_plane_size = benchmark_width * benchmark_height;
2556
2557 align_buffer_page_end(orig_y, y_plane_size * (1 + 4 + 4));
2558 float* dst_opt = reinterpret_cast<float*>(orig_y + y_plane_size);
2559 float* dst_c = reinterpret_cast<float*>(orig_y + y_plane_size * 5);
2560
2561 MemRandomize(orig_y, y_plane_size);
2562 memset(dst_c, 0, y_plane_size * 4);
2563 memset(dst_opt, 1, y_plane_size * 4);
2564
2565 // Disable all optimizations.
2566 MaskCpuFlags(disable_cpu_flags);
2567 ByteToFloat(orig_y, dst_c, scale, y_plane_size);
2568
2569 // Enable optimizations.
2570 MaskCpuFlags(benchmark_cpu_info);
2571 for (j = 0; j < benchmark_iterations; j++) {
2572 ByteToFloat(orig_y, dst_opt, scale, y_plane_size);
2573 }
2574
2575 float max_diff = 0;
2576 for (i = 0; i < y_plane_size; ++i) {
2577 float abs_diff = fabs(dst_c[i] - dst_opt[i]);
2578 if (abs_diff > max_diff) {
2579 max_diff = abs_diff;
2580 }
2581 }
2582
2583 free_aligned_buffer_page_end(orig_y);
2584 return max_diff;
2585 }
2586
TEST_F(LibYUVPlanarTest,TestByteToFloat)2587 TEST_F(LibYUVPlanarTest, TestByteToFloat) {
2588 float diff = TestByteToFloat(benchmark_width_, benchmark_height_,
2589 benchmark_iterations_, disable_cpu_flags_,
2590 benchmark_cpu_info_, 1.0f);
2591 EXPECT_EQ(0.f, diff);
2592 }
2593
TEST_F(LibYUVPlanarTest,TestARGBLumaColorTable)2594 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
2595 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2596 SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2597 SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2598 memset(orig_pixels, 0, sizeof(orig_pixels));
2599
2600 align_buffer_page_end(lumacolortable, 32768);
2601 int v = 0;
2602 for (int i = 0; i < 32768; ++i) {
2603 lumacolortable[i] = v;
2604 v += 3;
2605 }
2606 // Test blue
2607 orig_pixels[0][0] = 255u;
2608 orig_pixels[0][1] = 0u;
2609 orig_pixels[0][2] = 0u;
2610 orig_pixels[0][3] = 128u;
2611 // Test green
2612 orig_pixels[1][0] = 0u;
2613 orig_pixels[1][1] = 255u;
2614 orig_pixels[1][2] = 0u;
2615 orig_pixels[1][3] = 0u;
2616 // Test red
2617 orig_pixels[2][0] = 0u;
2618 orig_pixels[2][1] = 0u;
2619 orig_pixels[2][2] = 255u;
2620 orig_pixels[2][3] = 255u;
2621 // Test color
2622 orig_pixels[3][0] = 16u;
2623 orig_pixels[3][1] = 64u;
2624 orig_pixels[3][2] = 192u;
2625 orig_pixels[3][3] = 224u;
2626 // Do 16 to test asm version.
2627 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2628 &lumacolortable[0], 16, 1);
2629 EXPECT_EQ(253u, dst_pixels_opt[0][0]);
2630 EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2631 EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2632 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2633 EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2634 EXPECT_EQ(253u, dst_pixels_opt[1][1]);
2635 EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2636 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2637 EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2638 EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2639 EXPECT_EQ(253u, dst_pixels_opt[2][2]);
2640 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2641 EXPECT_EQ(48u, dst_pixels_opt[3][0]);
2642 EXPECT_EQ(192u, dst_pixels_opt[3][1]);
2643 EXPECT_EQ(64u, dst_pixels_opt[3][2]);
2644 EXPECT_EQ(224u, dst_pixels_opt[3][3]);
2645
2646 for (int i = 0; i < 1280; ++i) {
2647 orig_pixels[i][0] = i;
2648 orig_pixels[i][1] = i / 2;
2649 orig_pixels[i][2] = i / 3;
2650 orig_pixels[i][3] = i;
2651 }
2652
2653 MaskCpuFlags(disable_cpu_flags_);
2654 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2655 lumacolortable, 1280, 1);
2656 MaskCpuFlags(benchmark_cpu_info_);
2657
2658 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2659 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2660 lumacolortable, 1280, 1);
2661 }
2662 for (int i = 0; i < 1280; ++i) {
2663 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2664 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2665 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2666 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2667 }
2668
2669 free_aligned_buffer_page_end(lumacolortable);
2670 }
2671
TEST_F(LibYUVPlanarTest,TestARGBCopyAlpha)2672 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
2673 const int kSize = benchmark_width_ * benchmark_height_ * 4;
2674 align_buffer_page_end(orig_pixels, kSize);
2675 align_buffer_page_end(dst_pixels_opt, kSize);
2676 align_buffer_page_end(dst_pixels_c, kSize);
2677
2678 MemRandomize(orig_pixels, kSize);
2679 MemRandomize(dst_pixels_opt, kSize);
2680 memcpy(dst_pixels_c, dst_pixels_opt, kSize);
2681
2682 MaskCpuFlags(disable_cpu_flags_);
2683 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c,
2684 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2685 MaskCpuFlags(benchmark_cpu_info_);
2686
2687 for (int i = 0; i < benchmark_iterations_; ++i) {
2688 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt,
2689 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2690 }
2691 for (int i = 0; i < kSize; ++i) {
2692 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2693 }
2694
2695 free_aligned_buffer_page_end(dst_pixels_c);
2696 free_aligned_buffer_page_end(dst_pixels_opt);
2697 free_aligned_buffer_page_end(orig_pixels);
2698 }
2699
TEST_F(LibYUVPlanarTest,TestARGBExtractAlpha)2700 TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
2701 const int kPixels = benchmark_width_ * benchmark_height_;
2702 align_buffer_page_end(src_pixels, kPixels * 4);
2703 align_buffer_page_end(dst_pixels_opt, kPixels);
2704 align_buffer_page_end(dst_pixels_c, kPixels);
2705
2706 MemRandomize(src_pixels, kPixels * 4);
2707 MemRandomize(dst_pixels_opt, kPixels);
2708 memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
2709
2710 MaskCpuFlags(disable_cpu_flags_);
2711 ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2712 benchmark_width_, benchmark_width_, benchmark_height_);
2713 MaskCpuFlags(benchmark_cpu_info_);
2714
2715 for (int i = 0; i < benchmark_iterations_; ++i) {
2716 ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2717 benchmark_width_, benchmark_width_, benchmark_height_);
2718 }
2719 for (int i = 0; i < kPixels; ++i) {
2720 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2721 }
2722
2723 free_aligned_buffer_page_end(dst_pixels_c);
2724 free_aligned_buffer_page_end(dst_pixels_opt);
2725 free_aligned_buffer_page_end(src_pixels);
2726 }
2727
TEST_F(LibYUVPlanarTest,TestARGBCopyYToAlpha)2728 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
2729 const int kPixels = benchmark_width_ * benchmark_height_;
2730 align_buffer_page_end(orig_pixels, kPixels);
2731 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
2732 align_buffer_page_end(dst_pixels_c, kPixels * 4);
2733
2734 MemRandomize(orig_pixels, kPixels);
2735 MemRandomize(dst_pixels_opt, kPixels * 4);
2736 memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4);
2737
2738 MaskCpuFlags(disable_cpu_flags_);
2739 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2740 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2741 MaskCpuFlags(benchmark_cpu_info_);
2742
2743 for (int i = 0; i < benchmark_iterations_; ++i) {
2744 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2745 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2746 }
2747 for (int i = 0; i < kPixels * 4; ++i) {
2748 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2749 }
2750
2751 free_aligned_buffer_page_end(dst_pixels_c);
2752 free_aligned_buffer_page_end(dst_pixels_opt);
2753 free_aligned_buffer_page_end(orig_pixels);
2754 }
2755
TestARGBRect(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int bpp)2756 static int TestARGBRect(int width,
2757 int height,
2758 int benchmark_iterations,
2759 int disable_cpu_flags,
2760 int benchmark_cpu_info,
2761 int invert,
2762 int off,
2763 int bpp) {
2764 if (width < 1) {
2765 width = 1;
2766 }
2767 const int kStride = width * bpp;
2768 const int kSize = kStride * height;
2769 const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
2770
2771 align_buffer_page_end(dst_argb_c, kSize + off);
2772 align_buffer_page_end(dst_argb_opt, kSize + off);
2773
2774 MemRandomize(dst_argb_c + off, kSize);
2775 memcpy(dst_argb_opt + off, dst_argb_c + off, kSize);
2776
2777 MaskCpuFlags(disable_cpu_flags);
2778 if (bpp == 4) {
2779 ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32);
2780 } else {
2781 SetPlane(dst_argb_c + off, kStride, width, invert * height, v32);
2782 }
2783
2784 MaskCpuFlags(benchmark_cpu_info);
2785 for (int i = 0; i < benchmark_iterations; ++i) {
2786 if (bpp == 4) {
2787 ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32);
2788 } else {
2789 SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32);
2790 }
2791 }
2792 int max_diff = 0;
2793 for (int i = 0; i < kStride * height; ++i) {
2794 int abs_diff = abs(static_cast<int>(dst_argb_c[i + off]) -
2795 static_cast<int>(dst_argb_opt[i + off]));
2796 if (abs_diff > max_diff) {
2797 max_diff = abs_diff;
2798 }
2799 }
2800 free_aligned_buffer_page_end(dst_argb_c);
2801 free_aligned_buffer_page_end(dst_argb_opt);
2802 return max_diff;
2803 }
2804
TEST_F(LibYUVPlanarTest,ARGBRect_Any)2805 TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
2806 int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
2807 benchmark_iterations_, disable_cpu_flags_,
2808 benchmark_cpu_info_, +1, 0, 4);
2809 EXPECT_EQ(0, max_diff);
2810 }
2811
TEST_F(LibYUVPlanarTest,ARGBRect_Unaligned)2812 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) {
2813 int max_diff =
2814 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2815 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4);
2816 EXPECT_EQ(0, max_diff);
2817 }
2818
TEST_F(LibYUVPlanarTest,ARGBRect_Invert)2819 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) {
2820 int max_diff =
2821 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2822 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4);
2823 EXPECT_EQ(0, max_diff);
2824 }
2825
TEST_F(LibYUVPlanarTest,ARGBRect_Opt)2826 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
2827 int max_diff =
2828 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2829 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4);
2830 EXPECT_EQ(0, max_diff);
2831 }
2832
TEST_F(LibYUVPlanarTest,SetPlane_Any)2833 TEST_F(LibYUVPlanarTest, SetPlane_Any) {
2834 int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
2835 benchmark_iterations_, disable_cpu_flags_,
2836 benchmark_cpu_info_, +1, 0, 1);
2837 EXPECT_EQ(0, max_diff);
2838 }
2839
TEST_F(LibYUVPlanarTest,SetPlane_Unaligned)2840 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) {
2841 int max_diff =
2842 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2843 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
2844 EXPECT_EQ(0, max_diff);
2845 }
2846
TEST_F(LibYUVPlanarTest,SetPlane_Invert)2847 TEST_F(LibYUVPlanarTest, SetPlane_Invert) {
2848 int max_diff =
2849 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2850 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
2851 EXPECT_EQ(0, max_diff);
2852 }
2853
TEST_F(LibYUVPlanarTest,SetPlane_Opt)2854 TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
2855 int max_diff =
2856 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2857 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
2858 EXPECT_EQ(0, max_diff);
2859 }
2860
TEST_F(LibYUVPlanarTest,MergeUVPlane_Opt)2861 TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
2862 const int kPixels = benchmark_width_ * benchmark_height_;
2863 align_buffer_page_end(src_pixels_u, kPixels);
2864 align_buffer_page_end(src_pixels_v, kPixels);
2865 align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2866 align_buffer_page_end(dst_pixels_c, kPixels * 2);
2867
2868 MemRandomize(src_pixels_u, kPixels);
2869 MemRandomize(src_pixels_v, kPixels);
2870 MemRandomize(dst_pixels_opt, kPixels * 2);
2871 MemRandomize(dst_pixels_c, kPixels * 2);
2872
2873 MaskCpuFlags(disable_cpu_flags_);
2874 MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
2875 dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2876 benchmark_height_);
2877 MaskCpuFlags(benchmark_cpu_info_);
2878
2879 for (int i = 0; i < benchmark_iterations_; ++i) {
2880 MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
2881 dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2882 benchmark_height_);
2883 }
2884
2885 for (int i = 0; i < kPixels * 2; ++i) {
2886 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2887 }
2888
2889 free_aligned_buffer_page_end(src_pixels_u);
2890 free_aligned_buffer_page_end(src_pixels_v);
2891 free_aligned_buffer_page_end(dst_pixels_opt);
2892 free_aligned_buffer_page_end(dst_pixels_c);
2893 }
2894
2895 // 16 bit channel split and merge
TEST_F(LibYUVPlanarTest,MergeUVPlane_16_Opt)2896 TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) {
2897 const int kPixels = benchmark_width_ * benchmark_height_;
2898 align_buffer_page_end(src_pixels_u, kPixels * 2);
2899 align_buffer_page_end(src_pixels_v, kPixels * 2);
2900 align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2);
2901 align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2);
2902 MemRandomize(src_pixels_u, kPixels * 2);
2903 MemRandomize(src_pixels_v, kPixels * 2);
2904 MemRandomize(dst_pixels_opt, kPixels * 2 * 2);
2905 MemRandomize(dst_pixels_c, kPixels * 2 * 2);
2906
2907 MaskCpuFlags(disable_cpu_flags_);
2908 MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
2909 (const uint16_t*)src_pixels_v, benchmark_width_,
2910 (uint16_t*)dst_pixels_c, benchmark_width_ * 2,
2911 benchmark_width_, benchmark_height_, 12);
2912 MaskCpuFlags(benchmark_cpu_info_);
2913
2914 for (int i = 0; i < benchmark_iterations_; ++i) {
2915 MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
2916 (const uint16_t*)src_pixels_v, benchmark_width_,
2917 (uint16_t*)dst_pixels_opt, benchmark_width_ * 2,
2918 benchmark_width_, benchmark_height_, 12);
2919 }
2920
2921 for (int i = 0; i < kPixels * 2 * 2; ++i) {
2922 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2923 }
2924 free_aligned_buffer_page_end(src_pixels_u);
2925 free_aligned_buffer_page_end(src_pixels_v);
2926 free_aligned_buffer_page_end(dst_pixels_opt);
2927 free_aligned_buffer_page_end(dst_pixels_c);
2928 }
2929
TEST_F(LibYUVPlanarTest,SplitUVPlane_Opt)2930 TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
2931 const int kPixels = benchmark_width_ * benchmark_height_;
2932 align_buffer_page_end(src_pixels, kPixels * 2);
2933 align_buffer_page_end(dst_pixels_u_c, kPixels);
2934 align_buffer_page_end(dst_pixels_v_c, kPixels);
2935 align_buffer_page_end(dst_pixels_u_opt, kPixels);
2936 align_buffer_page_end(dst_pixels_v_opt, kPixels);
2937
2938 MemRandomize(src_pixels, kPixels * 2);
2939 MemRandomize(dst_pixels_u_c, kPixels);
2940 MemRandomize(dst_pixels_v_c, kPixels);
2941 MemRandomize(dst_pixels_u_opt, kPixels);
2942 MemRandomize(dst_pixels_v_opt, kPixels);
2943
2944 MaskCpuFlags(disable_cpu_flags_);
2945 SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_c,
2946 benchmark_width_, dst_pixels_v_c, benchmark_width_,
2947 benchmark_width_, benchmark_height_);
2948 MaskCpuFlags(benchmark_cpu_info_);
2949
2950 for (int i = 0; i < benchmark_iterations_; ++i) {
2951 SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_opt,
2952 benchmark_width_, dst_pixels_v_opt, benchmark_width_,
2953 benchmark_width_, benchmark_height_);
2954 }
2955
2956 for (int i = 0; i < kPixels; ++i) {
2957 EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
2958 EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
2959 }
2960
2961 free_aligned_buffer_page_end(src_pixels);
2962 free_aligned_buffer_page_end(dst_pixels_u_c);
2963 free_aligned_buffer_page_end(dst_pixels_v_c);
2964 free_aligned_buffer_page_end(dst_pixels_u_opt);
2965 free_aligned_buffer_page_end(dst_pixels_v_opt);
2966 }
2967
2968 // 16 bit channel split
TEST_F(LibYUVPlanarTest,SplitUVPlane_16_Opt)2969 TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
2970 const int kPixels = benchmark_width_ * benchmark_height_;
2971 align_buffer_page_end(src_pixels, kPixels * 2 * 2);
2972 align_buffer_page_end(dst_pixels_u_c, kPixels * 2);
2973 align_buffer_page_end(dst_pixels_v_c, kPixels * 2);
2974 align_buffer_page_end(dst_pixels_u_opt, kPixels * 2);
2975 align_buffer_page_end(dst_pixels_v_opt, kPixels * 2);
2976 MemRandomize(src_pixels, kPixels * 2 * 2);
2977 MemRandomize(dst_pixels_u_c, kPixels * 2);
2978 MemRandomize(dst_pixels_v_c, kPixels * 2);
2979 MemRandomize(dst_pixels_u_opt, kPixels * 2);
2980 MemRandomize(dst_pixels_v_opt, kPixels * 2);
2981
2982 MaskCpuFlags(disable_cpu_flags_);
2983 SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
2984 (uint16_t*)dst_pixels_u_c, benchmark_width_,
2985 (uint16_t*)dst_pixels_v_c, benchmark_width_, benchmark_width_,
2986 benchmark_height_, 10);
2987 MaskCpuFlags(benchmark_cpu_info_);
2988
2989 for (int i = 0; i < benchmark_iterations_; ++i) {
2990 SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
2991 (uint16_t*)dst_pixels_u_opt, benchmark_width_,
2992 (uint16_t*)dst_pixels_v_opt, benchmark_width_,
2993 benchmark_width_, benchmark_height_, 10);
2994 }
2995
2996 for (int i = 0; i < kPixels * 2; ++i) {
2997 EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
2998 EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
2999 }
3000 free_aligned_buffer_page_end(src_pixels);
3001 free_aligned_buffer_page_end(dst_pixels_u_c);
3002 free_aligned_buffer_page_end(dst_pixels_v_c);
3003 free_aligned_buffer_page_end(dst_pixels_u_opt);
3004 free_aligned_buffer_page_end(dst_pixels_v_opt);
3005 }
3006
TEST_F(LibYUVPlanarTest,SwapUVPlane_Opt)3007 TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
3008 // Round count up to multiple of 16
3009 const int kPixels = benchmark_width_ * benchmark_height_;
3010 align_buffer_page_end(src_pixels, kPixels * 2);
3011 align_buffer_page_end(dst_pixels_opt, kPixels * 2);
3012 align_buffer_page_end(dst_pixels_c, kPixels * 2);
3013
3014 MemRandomize(src_pixels, kPixels * 2);
3015 MemRandomize(dst_pixels_opt, kPixels * 2);
3016 MemRandomize(dst_pixels_c, kPixels * 2);
3017
3018 MaskCpuFlags(disable_cpu_flags_);
3019 SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
3020 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
3021 MaskCpuFlags(benchmark_cpu_info_);
3022
3023 for (int i = 0; i < benchmark_iterations_; ++i) {
3024 SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
3025 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
3026 }
3027
3028 for (int i = 0; i < kPixels * 2; ++i) {
3029 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3030 }
3031
3032 free_aligned_buffer_page_end(src_pixels);
3033 free_aligned_buffer_page_end(dst_pixels_opt);
3034 free_aligned_buffer_page_end(dst_pixels_c);
3035 }
3036
TEST_F(LibYUVPlanarTest,MergeRGBPlane_Opt)3037 TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
3038 // Round count up to multiple of 16
3039 const int kPixels = benchmark_width_ * benchmark_height_;
3040 align_buffer_page_end(src_pixels, kPixels * 3);
3041 align_buffer_page_end(tmp_pixels_r, kPixels);
3042 align_buffer_page_end(tmp_pixels_g, kPixels);
3043 align_buffer_page_end(tmp_pixels_b, kPixels);
3044 align_buffer_page_end(dst_pixels_opt, kPixels * 3);
3045 align_buffer_page_end(dst_pixels_c, kPixels * 3);
3046
3047 MemRandomize(src_pixels, kPixels * 3);
3048 MemRandomize(tmp_pixels_r, kPixels);
3049 MemRandomize(tmp_pixels_g, kPixels);
3050 MemRandomize(tmp_pixels_b, kPixels);
3051 MemRandomize(dst_pixels_opt, kPixels * 3);
3052 MemRandomize(dst_pixels_c, kPixels * 3);
3053
3054 MaskCpuFlags(disable_cpu_flags_);
3055 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3056 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3057 benchmark_width_, benchmark_width_, benchmark_height_);
3058 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3059 tmp_pixels_b, benchmark_width_, dst_pixels_c,
3060 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3061 MaskCpuFlags(benchmark_cpu_info_);
3062
3063 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3064 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3065 benchmark_width_, benchmark_width_, benchmark_height_);
3066
3067 for (int i = 0; i < benchmark_iterations_; ++i) {
3068 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3069 benchmark_width_, tmp_pixels_b, benchmark_width_,
3070 dst_pixels_opt, benchmark_width_ * 3, benchmark_width_,
3071 benchmark_height_);
3072 }
3073
3074 for (int i = 0; i < kPixels * 3; ++i) {
3075 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3076 }
3077
3078 free_aligned_buffer_page_end(src_pixels);
3079 free_aligned_buffer_page_end(tmp_pixels_r);
3080 free_aligned_buffer_page_end(tmp_pixels_g);
3081 free_aligned_buffer_page_end(tmp_pixels_b);
3082 free_aligned_buffer_page_end(dst_pixels_opt);
3083 free_aligned_buffer_page_end(dst_pixels_c);
3084 }
3085
TEST_F(LibYUVPlanarTest,SplitRGBPlane_Opt)3086 TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
3087 // Round count up to multiple of 16
3088 const int kPixels = benchmark_width_ * benchmark_height_;
3089 align_buffer_page_end(src_pixels, kPixels * 3);
3090 align_buffer_page_end(tmp_pixels_r, kPixels);
3091 align_buffer_page_end(tmp_pixels_g, kPixels);
3092 align_buffer_page_end(tmp_pixels_b, kPixels);
3093 align_buffer_page_end(dst_pixels_opt, kPixels * 3);
3094 align_buffer_page_end(dst_pixels_c, kPixels * 3);
3095
3096 MemRandomize(src_pixels, kPixels * 3);
3097 MemRandomize(tmp_pixels_r, kPixels);
3098 MemRandomize(tmp_pixels_g, kPixels);
3099 MemRandomize(tmp_pixels_b, kPixels);
3100 MemRandomize(dst_pixels_opt, kPixels * 3);
3101 MemRandomize(dst_pixels_c, kPixels * 3);
3102
3103 MaskCpuFlags(disable_cpu_flags_);
3104 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3105 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3106 benchmark_width_, benchmark_width_, benchmark_height_);
3107 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3108 tmp_pixels_b, benchmark_width_, dst_pixels_c,
3109 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3110 MaskCpuFlags(benchmark_cpu_info_);
3111
3112 for (int i = 0; i < benchmark_iterations_; ++i) {
3113 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3114 benchmark_width_, tmp_pixels_g, benchmark_width_,
3115 tmp_pixels_b, benchmark_width_, benchmark_width_,
3116 benchmark_height_);
3117 }
3118 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3119 tmp_pixels_b, benchmark_width_, dst_pixels_opt,
3120 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3121
3122 for (int i = 0; i < kPixels * 3; ++i) {
3123 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3124 }
3125
3126 free_aligned_buffer_page_end(src_pixels);
3127 free_aligned_buffer_page_end(tmp_pixels_r);
3128 free_aligned_buffer_page_end(tmp_pixels_g);
3129 free_aligned_buffer_page_end(tmp_pixels_b);
3130 free_aligned_buffer_page_end(dst_pixels_opt);
3131 free_aligned_buffer_page_end(dst_pixels_c);
3132 }
3133
TEST_F(LibYUVPlanarTest,MergeARGBPlane_Opt)3134 TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
3135 const int kPixels = benchmark_width_ * benchmark_height_;
3136 align_buffer_page_end(src_pixels, kPixels * 4);
3137 align_buffer_page_end(tmp_pixels_r, kPixels);
3138 align_buffer_page_end(tmp_pixels_g, kPixels);
3139 align_buffer_page_end(tmp_pixels_b, kPixels);
3140 align_buffer_page_end(tmp_pixels_a, kPixels);
3141 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3142 align_buffer_page_end(dst_pixels_c, kPixels * 4);
3143
3144 MemRandomize(src_pixels, kPixels * 4);
3145 MemRandomize(tmp_pixels_r, kPixels);
3146 MemRandomize(tmp_pixels_g, kPixels);
3147 MemRandomize(tmp_pixels_b, kPixels);
3148 MemRandomize(tmp_pixels_a, kPixels);
3149 MemRandomize(dst_pixels_opt, kPixels * 4);
3150 MemRandomize(dst_pixels_c, kPixels * 4);
3151
3152 MaskCpuFlags(disable_cpu_flags_);
3153 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3154 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3155 benchmark_width_, tmp_pixels_a, benchmark_width_,
3156 benchmark_width_, benchmark_height_);
3157 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3158 tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3159 dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
3160 benchmark_height_);
3161
3162 MaskCpuFlags(benchmark_cpu_info_);
3163 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3164 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3165 benchmark_width_, tmp_pixels_a, benchmark_width_,
3166 benchmark_width_, benchmark_height_);
3167
3168 for (int i = 0; i < benchmark_iterations_; ++i) {
3169 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3170 benchmark_width_, tmp_pixels_b, benchmark_width_,
3171 tmp_pixels_a, benchmark_width_, dst_pixels_opt,
3172 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3173 }
3174
3175 for (int i = 0; i < kPixels * 4; ++i) {
3176 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3177 }
3178
3179 free_aligned_buffer_page_end(src_pixels);
3180 free_aligned_buffer_page_end(tmp_pixels_r);
3181 free_aligned_buffer_page_end(tmp_pixels_g);
3182 free_aligned_buffer_page_end(tmp_pixels_b);
3183 free_aligned_buffer_page_end(tmp_pixels_a);
3184 free_aligned_buffer_page_end(dst_pixels_opt);
3185 free_aligned_buffer_page_end(dst_pixels_c);
3186 }
3187
TEST_F(LibYUVPlanarTest,SplitARGBPlane_Opt)3188 TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
3189 const int kPixels = benchmark_width_ * benchmark_height_;
3190 align_buffer_page_end(src_pixels, kPixels * 4);
3191 align_buffer_page_end(tmp_pixels_r, kPixels);
3192 align_buffer_page_end(tmp_pixels_g, kPixels);
3193 align_buffer_page_end(tmp_pixels_b, kPixels);
3194 align_buffer_page_end(tmp_pixels_a, kPixels);
3195 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3196 align_buffer_page_end(dst_pixels_c, kPixels * 4);
3197
3198 MemRandomize(src_pixels, kPixels * 4);
3199 MemRandomize(tmp_pixels_r, kPixels);
3200 MemRandomize(tmp_pixels_g, kPixels);
3201 MemRandomize(tmp_pixels_b, kPixels);
3202 MemRandomize(tmp_pixels_a, kPixels);
3203 MemRandomize(dst_pixels_opt, kPixels * 4);
3204 MemRandomize(dst_pixels_c, kPixels * 4);
3205
3206 MaskCpuFlags(disable_cpu_flags_);
3207 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3208 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3209 benchmark_width_, tmp_pixels_a, benchmark_width_,
3210 benchmark_width_, benchmark_height_);
3211 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3212 tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3213 dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
3214 benchmark_height_);
3215
3216 MaskCpuFlags(benchmark_cpu_info_);
3217 for (int i = 0; i < benchmark_iterations_; ++i) {
3218 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3219 benchmark_width_, tmp_pixels_g, benchmark_width_,
3220 tmp_pixels_b, benchmark_width_, tmp_pixels_a,
3221 benchmark_width_, benchmark_width_, benchmark_height_);
3222 }
3223
3224 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3225 tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3226 dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
3227 benchmark_height_);
3228
3229 for (int i = 0; i < kPixels * 4; ++i) {
3230 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3231 }
3232
3233 free_aligned_buffer_page_end(src_pixels);
3234 free_aligned_buffer_page_end(tmp_pixels_r);
3235 free_aligned_buffer_page_end(tmp_pixels_g);
3236 free_aligned_buffer_page_end(tmp_pixels_b);
3237 free_aligned_buffer_page_end(tmp_pixels_a);
3238 free_aligned_buffer_page_end(dst_pixels_opt);
3239 free_aligned_buffer_page_end(dst_pixels_c);
3240 }
3241
TEST_F(LibYUVPlanarTest,MergeXRGBPlane_Opt)3242 TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
3243 const int kPixels = benchmark_width_ * benchmark_height_;
3244 align_buffer_page_end(src_pixels, kPixels * 4);
3245 align_buffer_page_end(tmp_pixels_r, kPixels);
3246 align_buffer_page_end(tmp_pixels_g, kPixels);
3247 align_buffer_page_end(tmp_pixels_b, kPixels);
3248 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3249 align_buffer_page_end(dst_pixels_c, kPixels * 4);
3250
3251 MemRandomize(src_pixels, kPixels * 4);
3252 MemRandomize(tmp_pixels_r, kPixels);
3253 MemRandomize(tmp_pixels_g, kPixels);
3254 MemRandomize(tmp_pixels_b, kPixels);
3255 MemRandomize(dst_pixels_opt, kPixels * 4);
3256 MemRandomize(dst_pixels_c, kPixels * 4);
3257
3258 MaskCpuFlags(disable_cpu_flags_);
3259 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3260 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3261 benchmark_width_, NULL, 0, benchmark_width_,
3262 benchmark_height_);
3263 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3264 tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
3265 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3266
3267 MaskCpuFlags(benchmark_cpu_info_);
3268 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3269 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3270 benchmark_width_, NULL, 0, benchmark_width_,
3271 benchmark_height_);
3272
3273 for (int i = 0; i < benchmark_iterations_; ++i) {
3274 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3275 benchmark_width_, tmp_pixels_b, benchmark_width_, NULL, 0,
3276 dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
3277 benchmark_height_);
3278 }
3279
3280 for (int i = 0; i < kPixels * 4; ++i) {
3281 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3282 }
3283
3284 free_aligned_buffer_page_end(src_pixels);
3285 free_aligned_buffer_page_end(tmp_pixels_r);
3286 free_aligned_buffer_page_end(tmp_pixels_g);
3287 free_aligned_buffer_page_end(tmp_pixels_b);
3288 free_aligned_buffer_page_end(dst_pixels_opt);
3289 free_aligned_buffer_page_end(dst_pixels_c);
3290 }
3291
TEST_F(LibYUVPlanarTest,SplitXRGBPlane_Opt)3292 TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
3293 const int kPixels = benchmark_width_ * benchmark_height_;
3294 align_buffer_page_end(src_pixels, kPixels * 4);
3295 align_buffer_page_end(tmp_pixels_r, kPixels);
3296 align_buffer_page_end(tmp_pixels_g, kPixels);
3297 align_buffer_page_end(tmp_pixels_b, kPixels);
3298 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3299 align_buffer_page_end(dst_pixels_c, kPixels * 4);
3300
3301 MemRandomize(src_pixels, kPixels * 4);
3302 MemRandomize(tmp_pixels_r, kPixels);
3303 MemRandomize(tmp_pixels_g, kPixels);
3304 MemRandomize(tmp_pixels_b, kPixels);
3305 MemRandomize(dst_pixels_opt, kPixels * 4);
3306 MemRandomize(dst_pixels_c, kPixels * 4);
3307
3308 MaskCpuFlags(disable_cpu_flags_);
3309 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3310 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3311 benchmark_width_, NULL, 0, benchmark_width_,
3312 benchmark_height_);
3313 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3314 tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
3315 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3316
3317 MaskCpuFlags(benchmark_cpu_info_);
3318 for (int i = 0; i < benchmark_iterations_; ++i) {
3319 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3320 benchmark_width_, tmp_pixels_g, benchmark_width_,
3321 tmp_pixels_b, benchmark_width_, NULL, 0, benchmark_width_,
3322 benchmark_height_);
3323 }
3324
3325 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3326 tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_opt,
3327 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3328
3329 for (int i = 0; i < kPixels * 4; ++i) {
3330 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3331 }
3332
3333 free_aligned_buffer_page_end(src_pixels);
3334 free_aligned_buffer_page_end(tmp_pixels_r);
3335 free_aligned_buffer_page_end(tmp_pixels_g);
3336 free_aligned_buffer_page_end(tmp_pixels_b);
3337 free_aligned_buffer_page_end(dst_pixels_opt);
3338 free_aligned_buffer_page_end(dst_pixels_c);
3339 }
3340
3341 // Merge 4 channels
3342 #define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
3343 TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
3344 const int kWidth = W1280; \
3345 const int kPixels = kWidth * benchmark_height_; \
3346 align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3347 align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3348 align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3349 align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
3350 align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
3351 align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
3352 MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3353 MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3354 MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3355 MemRandomize(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
3356 memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
3357 memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
3358 STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
3359 STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
3360 STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
3361 STYPE* src_pixels_a = reinterpret_cast<STYPE*>(src_memory_a + OFF); \
3362 DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
3363 DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
3364 MaskCpuFlags(disable_cpu_flags_); \
3365 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3366 kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4, \
3367 kWidth, NEG benchmark_height_, DEPTH); \
3368 MaskCpuFlags(benchmark_cpu_info_); \
3369 for (int i = 0; i < benchmark_iterations_; ++i) { \
3370 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3371 kWidth, src_pixels_a, kWidth, dst_pixels_opt, kWidth * 4, \
3372 kWidth, NEG benchmark_height_, DEPTH); \
3373 } \
3374 for (int i = 0; i < kPixels * 4; ++i) { \
3375 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
3376 } \
3377 free_aligned_buffer_page_end(src_memory_r); \
3378 free_aligned_buffer_page_end(src_memory_g); \
3379 free_aligned_buffer_page_end(src_memory_b); \
3380 free_aligned_buffer_page_end(src_memory_a); \
3381 free_aligned_buffer_page_end(dst_memory_c); \
3382 free_aligned_buffer_page_end(dst_memory_opt); \
3383 }
3384
3385 // Merge 3 channel RGB into 4 channel XRGB with opaque alpha
3386 #define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
3387 TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) { \
3388 const int kWidth = W1280; \
3389 const int kPixels = kWidth * benchmark_height_; \
3390 align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3391 align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3392 align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3393 align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
3394 align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
3395 MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3396 MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3397 MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3398 memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
3399 memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
3400 STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
3401 STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
3402 STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
3403 DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
3404 DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
3405 MaskCpuFlags(disable_cpu_flags_); \
3406 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3407 kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth, \
3408 NEG benchmark_height_, DEPTH); \
3409 MaskCpuFlags(benchmark_cpu_info_); \
3410 for (int i = 0; i < benchmark_iterations_; ++i) { \
3411 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3412 kWidth, NULL, 0, dst_pixels_opt, kWidth * 4, kWidth, \
3413 NEG benchmark_height_, DEPTH); \
3414 } \
3415 for (int i = 0; i < kPixels * 4; ++i) { \
3416 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
3417 } \
3418 free_aligned_buffer_page_end(src_memory_r); \
3419 free_aligned_buffer_page_end(src_memory_g); \
3420 free_aligned_buffer_page_end(src_memory_b); \
3421 free_aligned_buffer_page_end(dst_memory_c); \
3422 free_aligned_buffer_page_end(dst_memory_opt); \
3423 }
3424
3425 #define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
3426 TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
3427 TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
3428 2) \
3429 TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
3430 TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) \
3431 TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, \
3432 0) \
3433 TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
3434 2) \
3435 TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
3436 TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
3437
3438 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 10)
3439 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 12)
3440 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 16)
3441 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 10)
3442 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 12)
3443 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
3444
3445 #define TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
3446 TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
3447 const int kWidth = W1280; \
3448 const int kPixels = kWidth * benchmark_height_; \
3449 align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3450 align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3451 align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3452 align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
3453 align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
3454 MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3455 MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3456 MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3457 STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
3458 STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
3459 STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
3460 DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
3461 DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
3462 memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE)); \
3463 memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE)); \
3464 MaskCpuFlags(disable_cpu_flags_); \
3465 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3466 kWidth, dst_pixels_c, kWidth * 4, kWidth, \
3467 NEG benchmark_height_, DEPTH); \
3468 MaskCpuFlags(benchmark_cpu_info_); \
3469 for (int i = 0; i < benchmark_iterations_; ++i) { \
3470 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3471 kWidth, dst_pixels_opt, kWidth * 4, kWidth, \
3472 NEG benchmark_height_, DEPTH); \
3473 } \
3474 for (int i = 0; i < kPixels * 4; ++i) { \
3475 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
3476 } \
3477 free_aligned_buffer_page_end(src_memory_r); \
3478 free_aligned_buffer_page_end(src_memory_g); \
3479 free_aligned_buffer_page_end(src_memory_b); \
3480 free_aligned_buffer_page_end(dst_memory_c); \
3481 free_aligned_buffer_page_end(dst_memory_opt); \
3482 }
3483
3484 #define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
3485 TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
3486 TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
3487 2) \
3488 TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
3489 TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
3490
3491 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10)
3492 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12)
3493 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
3494
3495 // TODO(fbarchard): improve test for platforms and cpu detect
3496 #ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest,MergeUVRow_16_Opt)3497 TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
3498 // Round count up to multiple of 16
3499 const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
3500
3501 align_buffer_page_end(src_pixels_u, kPixels * 2);
3502 align_buffer_page_end(src_pixels_v, kPixels * 2);
3503 align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
3504 align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2);
3505
3506 MemRandomize(src_pixels_u, kPixels * 2);
3507 MemRandomize(src_pixels_v, kPixels * 2);
3508 memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
3509 memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
3510
3511 MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
3512 reinterpret_cast<const uint16_t*>(src_pixels_v),
3513 reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 16, kPixels);
3514
3515 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3516 for (int i = 0; i < benchmark_iterations_; ++i) {
3517 if (has_avx2) {
3518 MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
3519 reinterpret_cast<const uint16_t*>(src_pixels_v),
3520 reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
3521 kPixels);
3522 } else {
3523 MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
3524 reinterpret_cast<const uint16_t*>(src_pixels_v),
3525 reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
3526 kPixels);
3527 }
3528 }
3529
3530 for (int i = 0; i < kPixels * 2 * 2; ++i) {
3531 EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]);
3532 }
3533
3534 free_aligned_buffer_page_end(src_pixels_u);
3535 free_aligned_buffer_page_end(src_pixels_v);
3536 free_aligned_buffer_page_end(dst_pixels_uv_opt);
3537 free_aligned_buffer_page_end(dst_pixels_uv_c);
3538 }
3539 #endif
3540
3541 // TODO(fbarchard): Improve test for more platforms.
3542 #ifdef HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest,MultiplyRow_16_Opt)3543 TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
3544 // Round count up to multiple of 32
3545 const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3546
3547 align_buffer_page_end(src_pixels_y, kPixels * 2);
3548 align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3549 align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3550
3551 MemRandomize(src_pixels_y, kPixels * 2);
3552 memset(dst_pixels_y_opt, 0, kPixels * 2);
3553 memset(dst_pixels_y_c, 1, kPixels * 2);
3554
3555 MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3556 reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels);
3557
3558 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3559 for (int i = 0; i < benchmark_iterations_; ++i) {
3560 if (has_avx2) {
3561 MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
3562 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
3563 kPixels);
3564 } else {
3565 MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3566 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
3567 kPixels);
3568 }
3569 }
3570
3571 for (int i = 0; i < kPixels * 2; ++i) {
3572 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3573 }
3574
3575 free_aligned_buffer_page_end(src_pixels_y);
3576 free_aligned_buffer_page_end(dst_pixels_y_opt);
3577 free_aligned_buffer_page_end(dst_pixels_y_c);
3578 }
3579 #endif // HAS_MULTIPLYROW_16_AVX2
3580
TEST_F(LibYUVPlanarTest,Convert16To8Plane)3581 TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
3582 const int kPixels = benchmark_width_ * benchmark_height_;
3583 align_buffer_page_end(src_pixels_y, kPixels * 2);
3584 align_buffer_page_end(dst_pixels_y_opt, kPixels);
3585 align_buffer_page_end(dst_pixels_y_c, kPixels);
3586
3587 MemRandomize(src_pixels_y, kPixels * 2);
3588 memset(dst_pixels_y_opt, 0, kPixels);
3589 memset(dst_pixels_y_c, 1, kPixels);
3590
3591 MaskCpuFlags(disable_cpu_flags_);
3592 Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
3593 benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
3594 benchmark_width_, benchmark_height_);
3595 MaskCpuFlags(benchmark_cpu_info_);
3596
3597 for (int i = 0; i < benchmark_iterations_; ++i) {
3598 Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
3599 benchmark_width_, dst_pixels_y_opt, benchmark_width_,
3600 16384, benchmark_width_, benchmark_height_);
3601 }
3602
3603 for (int i = 0; i < kPixels; ++i) {
3604 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3605 }
3606
3607 free_aligned_buffer_page_end(src_pixels_y);
3608 free_aligned_buffer_page_end(dst_pixels_y_opt);
3609 free_aligned_buffer_page_end(dst_pixels_y_c);
3610 }
3611
TEST_F(LibYUVPlanarTest,YUY2ToY)3612 TEST_F(LibYUVPlanarTest, YUY2ToY) {
3613 const int kPixels = benchmark_width_ * benchmark_height_;
3614 align_buffer_page_end(src_pixels_y, kPixels * 2);
3615 align_buffer_page_end(dst_pixels_y_opt, kPixels);
3616 align_buffer_page_end(dst_pixels_y_c, kPixels);
3617
3618 MemRandomize(src_pixels_y, kPixels * 2);
3619 memset(dst_pixels_y_opt, 0, kPixels);
3620 memset(dst_pixels_y_c, 1, kPixels);
3621
3622 MaskCpuFlags(disable_cpu_flags_);
3623 YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
3624 benchmark_width_, benchmark_height_);
3625 MaskCpuFlags(benchmark_cpu_info_);
3626
3627 for (int i = 0; i < benchmark_iterations_; ++i) {
3628 YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
3629 benchmark_width_, benchmark_width_, benchmark_height_);
3630 }
3631
3632 for (int i = 0; i < kPixels; ++i) {
3633 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3634 }
3635
3636 free_aligned_buffer_page_end(src_pixels_y);
3637 free_aligned_buffer_page_end(dst_pixels_y_opt);
3638 free_aligned_buffer_page_end(dst_pixels_y_c);
3639 }
3640
TEST_F(LibYUVPlanarTest,UYVYToY)3641 TEST_F(LibYUVPlanarTest, UYVYToY) {
3642 const int kPixels = benchmark_width_ * benchmark_height_;
3643 align_buffer_page_end(src_pixels_y, kPixels * 2);
3644 align_buffer_page_end(dst_pixels_y_opt, kPixels);
3645 align_buffer_page_end(dst_pixels_y_c, kPixels);
3646
3647 MemRandomize(src_pixels_y, kPixels * 2);
3648 memset(dst_pixels_y_opt, 0, kPixels);
3649 memset(dst_pixels_y_c, 1, kPixels);
3650
3651 MaskCpuFlags(disable_cpu_flags_);
3652 UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
3653 benchmark_width_, benchmark_height_);
3654 MaskCpuFlags(benchmark_cpu_info_);
3655
3656 for (int i = 0; i < benchmark_iterations_; ++i) {
3657 UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
3658 benchmark_width_, benchmark_width_, benchmark_height_);
3659 }
3660
3661 for (int i = 0; i < kPixels; ++i) {
3662 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3663 }
3664
3665 free_aligned_buffer_page_end(src_pixels_y);
3666 free_aligned_buffer_page_end(dst_pixels_y_opt);
3667 free_aligned_buffer_page_end(dst_pixels_y_c);
3668 }
3669
3670 #ifdef ENABLE_ROW_TESTS
3671 // TODO(fbarchard): Improve test for more platforms.
3672 #ifdef HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert16To8Row_Opt)3673 TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
3674 // AVX2 does multiple of 32, so round count up
3675 const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3676 align_buffer_page_end(src_pixels_y, kPixels * 2);
3677 align_buffer_page_end(dst_pixels_y_opt, kPixels);
3678 align_buffer_page_end(dst_pixels_y_c, kPixels);
3679
3680 MemRandomize(src_pixels_y, kPixels * 2);
3681 // clamp source range to 10 bits.
3682 for (int i = 0; i < kPixels; ++i) {
3683 reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023;
3684 }
3685
3686 memset(dst_pixels_y_opt, 0, kPixels);
3687 memset(dst_pixels_y_c, 1, kPixels);
3688
3689 Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3690 dst_pixels_y_c, 16384, kPixels);
3691
3692 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3693 int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
3694 for (int i = 0; i < benchmark_iterations_; ++i) {
3695 if (has_avx2) {
3696 Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
3697 dst_pixels_y_opt, 16384, kPixels);
3698 } else if (has_ssse3) {
3699 Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y),
3700 dst_pixels_y_opt, 16384, kPixels);
3701 } else {
3702 Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3703 dst_pixels_y_opt, 16384, kPixels);
3704 }
3705 }
3706
3707 for (int i = 0; i < kPixels; ++i) {
3708 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3709 }
3710
3711 free_aligned_buffer_page_end(src_pixels_y);
3712 free_aligned_buffer_page_end(dst_pixels_y_opt);
3713 free_aligned_buffer_page_end(dst_pixels_y_c);
3714 }
3715 #endif // HAS_CONVERT16TO8ROW_AVX2
3716
3717 #ifdef HAS_UYVYTOYROW_NEON
TEST_F(LibYUVPlanarTest,UYVYToYRow_Opt)3718 TEST_F(LibYUVPlanarTest, UYVYToYRow_Opt) {
3719 // NEON does multiple of 16, so round count up
3720 const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
3721 align_buffer_page_end(src_pixels_y, kPixels * 2);
3722 align_buffer_page_end(dst_pixels_y_opt, kPixels);
3723 align_buffer_page_end(dst_pixels_y_c, kPixels);
3724
3725 MemRandomize(src_pixels_y, kPixels * 2);
3726 memset(dst_pixels_y_opt, 0, kPixels);
3727 memset(dst_pixels_y_c, 1, kPixels);
3728
3729 UYVYToYRow_C(src_pixels_y, dst_pixels_y_c, kPixels);
3730
3731 for (int i = 0; i < benchmark_iterations_; ++i) {
3732 UYVYToYRow_NEON(src_pixels_y, dst_pixels_y_opt, kPixels);
3733 }
3734
3735 for (int i = 0; i < kPixels; ++i) {
3736 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3737 }
3738
3739 free_aligned_buffer_page_end(src_pixels_y);
3740 free_aligned_buffer_page_end(dst_pixels_y_opt);
3741 free_aligned_buffer_page_end(dst_pixels_y_c);
3742 }
3743 #endif // HAS_UYVYTOYROW_NEON
3744
3745 #endif // ENABLE_ROW_TESTS
3746
TEST_F(LibYUVPlanarTest,Convert8To16Plane)3747 TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
3748 const int kPixels = benchmark_width_ * benchmark_height_;
3749 align_buffer_page_end(src_pixels_y, kPixels);
3750 align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3751 align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3752
3753 MemRandomize(src_pixels_y, kPixels);
3754 memset(dst_pixels_y_opt, 0, kPixels * 2);
3755 memset(dst_pixels_y_c, 1, kPixels * 2);
3756
3757 MaskCpuFlags(disable_cpu_flags_);
3758 Convert8To16Plane(src_pixels_y, benchmark_width_,
3759 reinterpret_cast<uint16_t*>(dst_pixels_y_c),
3760 benchmark_width_, 1024, benchmark_width_,
3761 benchmark_height_);
3762 MaskCpuFlags(benchmark_cpu_info_);
3763
3764 for (int i = 0; i < benchmark_iterations_; ++i) {
3765 Convert8To16Plane(src_pixels_y, benchmark_width_,
3766 reinterpret_cast<uint16_t*>(dst_pixels_y_opt),
3767 benchmark_width_, 1024, benchmark_width_,
3768 benchmark_height_);
3769 }
3770
3771 for (int i = 0; i < kPixels * 2; ++i) {
3772 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3773 }
3774
3775 free_aligned_buffer_page_end(src_pixels_y);
3776 free_aligned_buffer_page_end(dst_pixels_y_opt);
3777 free_aligned_buffer_page_end(dst_pixels_y_c);
3778 }
3779
3780 #ifdef ENABLE_ROW_TESTS
3781 // TODO(fbarchard): Improve test for more platforms.
3782 #ifdef HAS_CONVERT8TO16ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert8To16Row_Opt)3783 TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
3784 const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3785 align_buffer_page_end(src_pixels_y, kPixels);
3786 align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3787 align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3788
3789 MemRandomize(src_pixels_y, kPixels);
3790 memset(dst_pixels_y_opt, 0, kPixels * 2);
3791 memset(dst_pixels_y_c, 1, kPixels * 2);
3792
3793 Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c),
3794 1024, kPixels);
3795
3796 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3797 int has_sse2 = TestCpuFlag(kCpuHasSSE2);
3798 for (int i = 0; i < benchmark_iterations_; ++i) {
3799 if (has_avx2) {
3800 Convert8To16Row_AVX2(src_pixels_y,
3801 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3802 kPixels);
3803 } else if (has_sse2) {
3804 Convert8To16Row_SSE2(src_pixels_y,
3805 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3806 kPixels);
3807 } else {
3808 Convert8To16Row_C(src_pixels_y,
3809 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3810 kPixels);
3811 }
3812 }
3813
3814 for (int i = 0; i < kPixels * 2; ++i) {
3815 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3816 }
3817
3818 free_aligned_buffer_page_end(src_pixels_y);
3819 free_aligned_buffer_page_end(dst_pixels_y_opt);
3820 free_aligned_buffer_page_end(dst_pixels_y_c);
3821 }
3822 #endif // HAS_CONVERT8TO16ROW_AVX2
3823
TestScaleMaxSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3824 float TestScaleMaxSamples(int benchmark_width,
3825 int benchmark_height,
3826 int benchmark_iterations,
3827 float scale,
3828 bool opt) {
3829 int i, j;
3830 float max_c, max_opt = 0.f;
3831 // NEON does multiple of 8, so round count up
3832 const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3833 align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48);
3834 uint8_t* dst_c = orig_y + kPixels * 4 + 16;
3835 uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32;
3836
3837 // Randomize works but may contain some denormals affecting performance.
3838 // MemRandomize(orig_y, kPixels * 4);
3839 // large values are problematic. audio is really -1 to 1.
3840 for (i = 0; i < kPixels; ++i) {
3841 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3842 }
3843 memset(dst_c, 0, kPixels * 4);
3844 memset(dst_opt, 1, kPixels * 4);
3845
3846 max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3847 reinterpret_cast<float*>(dst_c), scale, kPixels);
3848
3849 for (j = 0; j < benchmark_iterations; j++) {
3850 if (opt) {
3851 #ifdef HAS_SCALESUMSAMPLES_NEON
3852 max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y),
3853 reinterpret_cast<float*>(dst_opt), scale,
3854 kPixels);
3855 #else
3856 max_opt =
3857 ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3858 reinterpret_cast<float*>(dst_opt), scale, kPixels);
3859 #endif
3860 } else {
3861 max_opt =
3862 ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3863 reinterpret_cast<float*>(dst_opt), scale, kPixels);
3864 }
3865 }
3866
3867 float max_diff = FAbs(max_opt - max_c);
3868 for (i = 0; i < kPixels; ++i) {
3869 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3870 (reinterpret_cast<float*>(dst_opt)[i]));
3871 if (abs_diff > max_diff) {
3872 max_diff = abs_diff;
3873 }
3874 }
3875
3876 free_aligned_buffer_page_end(orig_y);
3877 return max_diff;
3878 }
3879
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_C)3880 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) {
3881 float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
3882 benchmark_iterations_, 1.2f, false);
3883 EXPECT_EQ(0, diff);
3884 }
3885
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_Opt)3886 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) {
3887 float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
3888 benchmark_iterations_, 1.2f, true);
3889 EXPECT_EQ(0, diff);
3890 }
3891
TestScaleSumSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3892 float TestScaleSumSamples(int benchmark_width,
3893 int benchmark_height,
3894 int benchmark_iterations,
3895 float scale,
3896 bool opt) {
3897 int i, j;
3898 float sum_c, sum_opt = 0.f;
3899 // NEON does multiple of 8, so round count up
3900 const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3901 align_buffer_page_end(orig_y, kPixels * 4 * 3);
3902 uint8_t* dst_c = orig_y + kPixels * 4;
3903 uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
3904
3905 // Randomize works but may contain some denormals affecting performance.
3906 // MemRandomize(orig_y, kPixels * 4);
3907 // large values are problematic. audio is really -1 to 1.
3908 for (i = 0; i < kPixels; ++i) {
3909 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3910 }
3911 memset(dst_c, 0, kPixels * 4);
3912 memset(dst_opt, 1, kPixels * 4);
3913
3914 sum_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
3915 reinterpret_cast<float*>(dst_c), scale, kPixels);
3916
3917 for (j = 0; j < benchmark_iterations; j++) {
3918 if (opt) {
3919 #ifdef HAS_SCALESUMSAMPLES_NEON
3920 sum_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y),
3921 reinterpret_cast<float*>(dst_opt), scale,
3922 kPixels);
3923 #else
3924 sum_opt =
3925 ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
3926 reinterpret_cast<float*>(dst_opt), scale, kPixels);
3927 #endif
3928 } else {
3929 sum_opt =
3930 ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
3931 reinterpret_cast<float*>(dst_opt), scale, kPixels);
3932 }
3933 }
3934
3935 float mse_opt = sum_opt / kPixels * 4;
3936 float mse_c = sum_c / kPixels * 4;
3937 float mse_error = FAbs(mse_opt - mse_c) / mse_c;
3938
3939 // If the sum of a float is more than 4 million, small adds are round down on
3940 // float and produce different results with vectorized sum vs scalar sum.
3941 // Ignore the difference if the sum is large.
3942 float max_diff = 0.f;
3943 if (mse_error > 0.0001 && sum_c < 4000000) { // allow .01% difference of mse
3944 max_diff = mse_error;
3945 }
3946
3947 for (i = 0; i < kPixels; ++i) {
3948 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3949 (reinterpret_cast<float*>(dst_opt)[i]));
3950 if (abs_diff > max_diff) {
3951 max_diff = abs_diff;
3952 }
3953 }
3954
3955 free_aligned_buffer_page_end(orig_y);
3956 return max_diff;
3957 }
3958
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_C)3959 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) {
3960 float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
3961 benchmark_iterations_, 1.2f, false);
3962 EXPECT_EQ(0, diff);
3963 }
3964
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_Opt)3965 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) {
3966 float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
3967 benchmark_iterations_, 1.2f, true);
3968 EXPECT_EQ(0, diff);
3969 }
3970
TestScaleSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3971 float TestScaleSamples(int benchmark_width,
3972 int benchmark_height,
3973 int benchmark_iterations,
3974 float scale,
3975 bool opt) {
3976 int i, j;
3977 // NEON does multiple of 8, so round count up
3978 const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3979 align_buffer_page_end(orig_y, kPixels * 4 * 3);
3980 uint8_t* dst_c = orig_y + kPixels * 4;
3981 uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
3982
3983 // Randomize works but may contain some denormals affecting performance.
3984 // MemRandomize(orig_y, kPixels * 4);
3985 // large values are problematic. audio is really -1 to 1.
3986 for (i = 0; i < kPixels; ++i) {
3987 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3988 }
3989 memset(dst_c, 0, kPixels * 4);
3990 memset(dst_opt, 1, kPixels * 4);
3991
3992 ScaleSamples_C(reinterpret_cast<float*>(orig_y),
3993 reinterpret_cast<float*>(dst_c), scale, kPixels);
3994
3995 for (j = 0; j < benchmark_iterations; j++) {
3996 if (opt) {
3997 #ifdef HAS_SCALESUMSAMPLES_NEON
3998 ScaleSamples_NEON(reinterpret_cast<float*>(orig_y),
3999 reinterpret_cast<float*>(dst_opt), scale, kPixels);
4000 #else
4001 ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4002 reinterpret_cast<float*>(dst_opt), scale, kPixels);
4003 #endif
4004 } else {
4005 ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4006 reinterpret_cast<float*>(dst_opt), scale, kPixels);
4007 }
4008 }
4009
4010 float max_diff = 0.f;
4011 for (i = 0; i < kPixels; ++i) {
4012 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4013 (reinterpret_cast<float*>(dst_opt)[i]));
4014 if (abs_diff > max_diff) {
4015 max_diff = abs_diff;
4016 }
4017 }
4018
4019 free_aligned_buffer_page_end(orig_y);
4020 return max_diff;
4021 }
4022
TEST_F(LibYUVPlanarTest,TestScaleSamples_C)4023 TEST_F(LibYUVPlanarTest, TestScaleSamples_C) {
4024 float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
4025 benchmark_iterations_, 1.2f, false);
4026 EXPECT_EQ(0, diff);
4027 }
4028
TEST_F(LibYUVPlanarTest,TestScaleSamples_Opt)4029 TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) {
4030 float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
4031 benchmark_iterations_, 1.2f, true);
4032 EXPECT_EQ(0, diff);
4033 }
4034
TestCopySamples(int benchmark_width,int benchmark_height,int benchmark_iterations,bool opt)4035 float TestCopySamples(int benchmark_width,
4036 int benchmark_height,
4037 int benchmark_iterations,
4038 bool opt) {
4039 int i, j;
4040 // NEON does multiple of 16 floats, so round count up
4041 const int kPixels = (benchmark_width * benchmark_height + 15) & ~15;
4042 align_buffer_page_end(orig_y, kPixels * 4 * 3);
4043 uint8_t* dst_c = orig_y + kPixels * 4;
4044 uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
4045
4046 // Randomize works but may contain some denormals affecting performance.
4047 // MemRandomize(orig_y, kPixels * 4);
4048 // large values are problematic. audio is really -1 to 1.
4049 for (i = 0; i < kPixels; ++i) {
4050 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
4051 }
4052 memset(dst_c, 0, kPixels * 4);
4053 memset(dst_opt, 1, kPixels * 4);
4054
4055 memcpy(reinterpret_cast<void*>(dst_c), reinterpret_cast<void*>(orig_y),
4056 kPixels * 4);
4057
4058 for (j = 0; j < benchmark_iterations; j++) {
4059 if (opt) {
4060 #ifdef HAS_COPYROW_NEON
4061 CopyRow_NEON(orig_y, dst_opt, kPixels * 4);
4062 #else
4063 CopyRow_C(orig_y, dst_opt, kPixels * 4);
4064 #endif
4065 } else {
4066 CopyRow_C(orig_y, dst_opt, kPixels * 4);
4067 }
4068 }
4069
4070 float max_diff = 0.f;
4071 for (i = 0; i < kPixels; ++i) {
4072 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4073 (reinterpret_cast<float*>(dst_opt)[i]));
4074 if (abs_diff > max_diff) {
4075 max_diff = abs_diff;
4076 }
4077 }
4078
4079 free_aligned_buffer_page_end(orig_y);
4080 return max_diff;
4081 }
4082
TEST_F(LibYUVPlanarTest,TestCopySamples_C)4083 TEST_F(LibYUVPlanarTest, TestCopySamples_C) {
4084 float diff = TestCopySamples(benchmark_width_, benchmark_height_,
4085 benchmark_iterations_, false);
4086 EXPECT_EQ(0, diff);
4087 }
4088
TEST_F(LibYUVPlanarTest,TestCopySamples_Opt)4089 TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) {
4090 float diff = TestCopySamples(benchmark_width_, benchmark_height_,
4091 benchmark_iterations_, true);
4092 EXPECT_EQ(0, diff);
4093 }
4094
4095 extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
4096 extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
4097
TEST_F(LibYUVPlanarTest,TestGaussRow_Opt)4098 TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
4099 SIMD_ALIGNED(uint32_t orig_pixels[1280 + 8]);
4100 SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
4101 SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
4102
4103 memset(orig_pixels, 0, sizeof(orig_pixels));
4104 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4105 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4106
4107 for (int i = 0; i < 1280 + 8; ++i) {
4108 orig_pixels[i] = i * 256;
4109 }
4110 GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
4111 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4112 #if !defined(LIBYUV_DISABLE_NEON) && \
4113 (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
4114 int has_neon = TestCpuFlag(kCpuHasNEON);
4115 if (has_neon) {
4116 GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4117 } else {
4118 GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4119 }
4120 #else
4121 GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4122 #endif
4123 }
4124
4125 for (int i = 0; i < 1280; ++i) {
4126 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4127 }
4128
4129 EXPECT_EQ(dst_pixels_c[0],
4130 static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1));
4131 EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256));
4132 }
4133
4134 extern "C" void GaussCol_NEON(const uint16_t* src0,
4135 const uint16_t* src1,
4136 const uint16_t* src2,
4137 const uint16_t* src3,
4138 const uint16_t* src4,
4139 uint32_t* dst,
4140 int width);
4141
4142 extern "C" void GaussCol_C(const uint16_t* src0,
4143 const uint16_t* src1,
4144 const uint16_t* src2,
4145 const uint16_t* src3,
4146 const uint16_t* src4,
4147 uint32_t* dst,
4148 int width);
4149
TEST_F(LibYUVPlanarTest,TestGaussCol_Opt)4150 TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
4151 SIMD_ALIGNED(uint16_t orig_pixels[1280 * 5]);
4152 SIMD_ALIGNED(uint32_t dst_pixels_c[1280]);
4153 SIMD_ALIGNED(uint32_t dst_pixels_opt[1280]);
4154
4155 memset(orig_pixels, 0, sizeof(orig_pixels));
4156 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4157 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4158
4159 for (int i = 0; i < 1280 * 5; ++i) {
4160 orig_pixels[i] = static_cast<float>(i);
4161 }
4162 GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4163 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], &dst_pixels_c[0],
4164 1280);
4165 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4166 #if !defined(LIBYUV_DISABLE_NEON) && \
4167 (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
4168 int has_neon = TestCpuFlag(kCpuHasNEON);
4169 if (has_neon) {
4170 GaussCol_NEON(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4171 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4172 &dst_pixels_opt[0], 1280);
4173 } else {
4174 GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4175 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4176 &dst_pixels_opt[0], 1280);
4177 }
4178 #else
4179 GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4180 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4181 &dst_pixels_opt[0], 1280);
4182 #endif
4183 }
4184
4185 for (int i = 0; i < 1280; ++i) {
4186 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4187 }
4188 }
4189
TEST_F(LibYUVPlanarTest,TestGaussRow_F32_Opt)4190 TEST_F(LibYUVPlanarTest, TestGaussRow_F32_Opt) {
4191 SIMD_ALIGNED(float orig_pixels[1280 + 4]);
4192 SIMD_ALIGNED(float dst_pixels_c[1280]);
4193 SIMD_ALIGNED(float dst_pixels_opt[1280]);
4194
4195 memset(orig_pixels, 0, sizeof(orig_pixels));
4196 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4197 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4198
4199 for (int i = 0; i < 1280 + 4; ++i) {
4200 orig_pixels[i] = static_cast<float>(i);
4201 }
4202 GaussRow_F32_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
4203 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4204 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
4205 int has_neon = TestCpuFlag(kCpuHasNEON);
4206 if (has_neon) {
4207 GaussRow_F32_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4208 } else {
4209 GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4210 }
4211 #else
4212 GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4213 #endif
4214 }
4215
4216 for (int i = 0; i < 1280; ++i) {
4217 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4218 }
4219 }
4220
TEST_F(LibYUVPlanarTest,TestGaussCol_F32_Opt)4221 TEST_F(LibYUVPlanarTest, TestGaussCol_F32_Opt) {
4222 SIMD_ALIGNED(float dst_pixels_c[1280]);
4223 SIMD_ALIGNED(float dst_pixels_opt[1280]);
4224 align_buffer_page_end(orig_pixels_buf, 1280 * 5 * 4); // 5 rows
4225 float* orig_pixels = reinterpret_cast<float*>(orig_pixels_buf);
4226
4227 memset(orig_pixels, 0, 1280 * 5 * 4);
4228 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4229 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4230
4231 for (int i = 0; i < 1280 * 5; ++i) {
4232 orig_pixels[i] = static_cast<float>(i);
4233 }
4234 GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4235 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4236 &dst_pixels_c[0], 1280);
4237 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4238 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
4239 int has_neon = TestCpuFlag(kCpuHasNEON);
4240 if (has_neon) {
4241 GaussCol_F32_NEON(&orig_pixels[0], &orig_pixels[1280],
4242 &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
4243 &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
4244 } else {
4245 GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280],
4246 &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
4247 &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
4248 }
4249 #else
4250 GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4251 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4252 &dst_pixels_opt[0], 1280);
4253 #endif
4254 }
4255
4256 for (int i = 0; i < 1280; ++i) {
4257 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4258 }
4259 free_aligned_buffer_page_end(orig_pixels_buf);
4260 }
4261
TEST_F(LibYUVPlanarTest,SwapUVRow)4262 TEST_F(LibYUVPlanarTest, SwapUVRow) {
4263 const int kPixels = benchmark_width_ * benchmark_height_;
4264 void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
4265 SwapUVRow_C;
4266
4267 align_buffer_page_end(src_pixels_vu, kPixels * 2);
4268 align_buffer_page_end(dst_pixels_uv, kPixels * 2);
4269 MemRandomize(src_pixels_vu, kPixels * 2);
4270 memset(dst_pixels_uv, 1, kPixels * 2);
4271
4272 #if defined(HAS_SWAPUVROW_NEON)
4273 if (TestCpuFlag(kCpuHasNEON)) {
4274 SwapUVRow = SwapUVRow_Any_NEON;
4275 if (IS_ALIGNED(kPixels, 16)) {
4276 SwapUVRow = SwapUVRow_NEON;
4277 }
4278 }
4279 #endif
4280
4281 for (int j = 0; j < benchmark_iterations_; j++) {
4282 SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
4283 }
4284 for (int i = 0; i < kPixels; ++i) {
4285 EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
4286 EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
4287 }
4288
4289 free_aligned_buffer_page_end(src_pixels_vu);
4290 free_aligned_buffer_page_end(dst_pixels_uv);
4291 }
4292 #endif // ENABLE_ROW_TESTS
4293
TEST_F(LibYUVPlanarTest,TestGaussPlane_F32)4294 TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) {
4295 const int kSize = benchmark_width_ * benchmark_height_ * 4;
4296 align_buffer_page_end(orig_pixels, kSize);
4297 align_buffer_page_end(dst_pixels_opt, kSize);
4298 align_buffer_page_end(dst_pixels_c, kSize);
4299
4300 for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4301 ((float*)(orig_pixels))[i] = (i & 1023) * 3.14f;
4302 }
4303 memset(dst_pixels_opt, 1, kSize);
4304 memset(dst_pixels_c, 2, kSize);
4305
4306 MaskCpuFlags(disable_cpu_flags_);
4307 GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
4308 (float*)(dst_pixels_c), benchmark_width_, benchmark_width_,
4309 benchmark_height_);
4310 MaskCpuFlags(benchmark_cpu_info_);
4311
4312 for (int i = 0; i < benchmark_iterations_; ++i) {
4313 GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
4314 (float*)(dst_pixels_opt), benchmark_width_, benchmark_width_,
4315 benchmark_height_);
4316 }
4317 for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4318 EXPECT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f)
4319 << i;
4320 }
4321
4322 free_aligned_buffer_page_end(dst_pixels_c);
4323 free_aligned_buffer_page_end(dst_pixels_opt);
4324 free_aligned_buffer_page_end(orig_pixels);
4325 }
4326
TEST_F(LibYUVPlanarTest,HalfMergeUVPlane_Opt)4327 TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) {
4328 int dst_width = (benchmark_width_ + 1) / 2;
4329 int dst_height = (benchmark_height_ + 1) / 2;
4330 align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_);
4331 align_buffer_page_end(src_pixels_v, benchmark_width_ * benchmark_height_);
4332 align_buffer_page_end(tmp_pixels_u, dst_width * dst_height);
4333 align_buffer_page_end(tmp_pixels_v, dst_width * dst_height);
4334 align_buffer_page_end(dst_pixels_uv_opt, dst_width * 2 * dst_height);
4335 align_buffer_page_end(dst_pixels_uv_c, dst_width * 2 * dst_height);
4336
4337 MemRandomize(src_pixels_u, benchmark_width_ * benchmark_height_);
4338 MemRandomize(src_pixels_v, benchmark_width_ * benchmark_height_);
4339 MemRandomize(tmp_pixels_u, dst_width * dst_height);
4340 MemRandomize(tmp_pixels_v, dst_width * dst_height);
4341 MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height);
4342 MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height);
4343
4344 MaskCpuFlags(disable_cpu_flags_);
4345 HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
4346 benchmark_width_, dst_pixels_uv_c, dst_width * 2,
4347 benchmark_width_, benchmark_height_);
4348 MaskCpuFlags(benchmark_cpu_info_);
4349
4350 for (int i = 0; i < benchmark_iterations_; ++i) {
4351 HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
4352 benchmark_width_, dst_pixels_uv_opt, dst_width * 2,
4353 benchmark_width_, benchmark_height_);
4354 }
4355
4356 for (int i = 0; i < dst_width * 2 * dst_height; ++i) {
4357 EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]);
4358 }
4359
4360 free_aligned_buffer_page_end(src_pixels_u);
4361 free_aligned_buffer_page_end(src_pixels_v);
4362 free_aligned_buffer_page_end(tmp_pixels_u);
4363 free_aligned_buffer_page_end(tmp_pixels_v);
4364 free_aligned_buffer_page_end(dst_pixels_uv_opt);
4365 free_aligned_buffer_page_end(dst_pixels_uv_c);
4366 }
4367
TEST_F(LibYUVPlanarTest,NV12Copy)4368 TEST_F(LibYUVPlanarTest, NV12Copy) {
4369 const int halfwidth = (benchmark_width_ + 1) >> 1;
4370 const int halfheight = (benchmark_height_ + 1) >> 1;
4371 align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
4372 align_buffer_page_end(src_uv, halfwidth * 2 * halfheight);
4373 align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
4374 align_buffer_page_end(dst_uv, halfwidth * 2 * halfheight);
4375
4376 MemRandomize(src_y, benchmark_width_ * benchmark_height_);
4377 MemRandomize(src_uv, halfwidth * 2 * halfheight);
4378 MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
4379 MemRandomize(dst_uv, halfwidth * 2 * halfheight);
4380
4381 for (int i = 0; i < benchmark_iterations_; ++i) {
4382 NV12Copy(src_y, benchmark_width_, src_uv, halfwidth * 2, dst_y,
4383 benchmark_width_, dst_uv, halfwidth * 2, benchmark_width_,
4384 benchmark_height_);
4385 }
4386
4387 for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4388 EXPECT_EQ(src_y[i], dst_y[i]);
4389 }
4390 for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
4391 EXPECT_EQ(src_uv[i], dst_uv[i]);
4392 }
4393
4394 free_aligned_buffer_page_end(src_y);
4395 free_aligned_buffer_page_end(src_uv);
4396 free_aligned_buffer_page_end(dst_y);
4397 free_aligned_buffer_page_end(dst_uv);
4398 }
4399
TEST_F(LibYUVPlanarTest,NV21Copy)4400 TEST_F(LibYUVPlanarTest, NV21Copy) {
4401 const int halfwidth = (benchmark_width_ + 1) >> 1;
4402 const int halfheight = (benchmark_height_ + 1) >> 1;
4403 align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
4404 align_buffer_page_end(src_vu, halfwidth * 2 * halfheight);
4405 align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
4406 align_buffer_page_end(dst_vu, halfwidth * 2 * halfheight);
4407
4408 MemRandomize(src_y, benchmark_width_ * benchmark_height_);
4409 MemRandomize(src_vu, halfwidth * 2 * halfheight);
4410 MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
4411 MemRandomize(dst_vu, halfwidth * 2 * halfheight);
4412
4413 for (int i = 0; i < benchmark_iterations_; ++i) {
4414 NV21Copy(src_y, benchmark_width_, src_vu, halfwidth * 2, dst_y,
4415 benchmark_width_, dst_vu, halfwidth * 2, benchmark_width_,
4416 benchmark_height_);
4417 }
4418
4419 for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4420 EXPECT_EQ(src_y[i], dst_y[i]);
4421 }
4422 for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
4423 EXPECT_EQ(src_vu[i], dst_vu[i]);
4424 }
4425
4426 free_aligned_buffer_page_end(src_y);
4427 free_aligned_buffer_page_end(src_vu);
4428 free_aligned_buffer_page_end(dst_y);
4429 free_aligned_buffer_page_end(dst_vu);
4430 }
4431
4432 } // namespace libyuv
4433