• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <stdlib.h>
13 #include <time.h>
14 
15 #include "../unit_test/unit_test.h"
16 #include "libyuv/compare.h"
17 #include "libyuv/convert.h"
18 #include "libyuv/convert_argb.h"
19 #include "libyuv/convert_from.h"
20 #include "libyuv/convert_from_argb.h"
21 #include "libyuv/cpu_id.h"
22 #include "libyuv/planar_functions.h"
23 #include "libyuv/rotate.h"
24 #include "libyuv/scale.h"
25 
26 #ifdef ENABLE_ROW_TESTS
27 // row.h defines SIMD_ALIGNED, overriding unit_test.h
28 // TODO(fbarchard): Remove row.h from unittests.  Test public functions.
29 #include "libyuv/row.h" /* For ScaleSumSamples_Neon */
30 #endif
31 
32 #if defined(LIBYUV_BIT_EXACT)
33 #define EXPECTED_ATTENUATE_DIFF 0
34 #else
35 #define EXPECTED_ATTENUATE_DIFF 2
36 #endif
37 
38 namespace libyuv {
39 
TEST_F(LibYUVPlanarTest,TestAttenuate)40 TEST_F(LibYUVPlanarTest, TestAttenuate) {
41   const int kSize = 1280 * 4;
42   align_buffer_page_end(orig_pixels, kSize);
43   align_buffer_page_end(atten_pixels, kSize);
44   align_buffer_page_end(unatten_pixels, kSize);
45   align_buffer_page_end(atten2_pixels, kSize);
46 
47   // Test unattenuation clamps
48   orig_pixels[0 * 4 + 0] = 200u;
49   orig_pixels[0 * 4 + 1] = 129u;
50   orig_pixels[0 * 4 + 2] = 127u;
51   orig_pixels[0 * 4 + 3] = 128u;
52   // Test unattenuation transparent and opaque are unaffected
53   orig_pixels[1 * 4 + 0] = 16u;
54   orig_pixels[1 * 4 + 1] = 64u;
55   orig_pixels[1 * 4 + 2] = 192u;
56   orig_pixels[1 * 4 + 3] = 0u;
57   orig_pixels[2 * 4 + 0] = 16u;
58   orig_pixels[2 * 4 + 1] = 64u;
59   orig_pixels[2 * 4 + 2] = 192u;
60   orig_pixels[2 * 4 + 3] = 255u;
61   orig_pixels[3 * 4 + 0] = 16u;
62   orig_pixels[3 * 4 + 1] = 64u;
63   orig_pixels[3 * 4 + 2] = 192u;
64   orig_pixels[3 * 4 + 3] = 128u;
65   ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1);
66   EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
67   EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
68   EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
69   EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]);
70   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]);
71   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
72   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
73   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
74   EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]);
75   EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]);
76   EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]);
77   EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]);
78   EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]);
79   EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]);
80   EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]);
81   EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]);
82 
83   for (int i = 0; i < 1280; ++i) {
84     orig_pixels[i * 4 + 0] = i;
85     orig_pixels[i * 4 + 1] = i / 2;
86     orig_pixels[i * 4 + 2] = i / 3;
87     orig_pixels[i * 4 + 3] = i;
88   }
89   ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1);
90   ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1);
91   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
92     ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
93   }
94   for (int i = 0; i < 1280; ++i) {
95     EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2);
96     EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2);
97     EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2);
98     EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2);
99   }
100   // Make sure transparent, 50% and opaque are fully accurate.
101   EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
102   EXPECT_EQ(0, atten_pixels[0 * 4 + 1]);
103   EXPECT_EQ(0, atten_pixels[0 * 4 + 2]);
104   EXPECT_EQ(0, atten_pixels[0 * 4 + 3]);
105   EXPECT_EQ(64, atten_pixels[128 * 4 + 0]);
106   EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
107   EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
108   EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
109   EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF);
110   EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF);
111   EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF);
112   EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
113 
114   free_aligned_buffer_page_end(atten2_pixels);
115   free_aligned_buffer_page_end(unatten_pixels);
116   free_aligned_buffer_page_end(atten_pixels);
117   free_aligned_buffer_page_end(orig_pixels);
118 }
119 
TestAttenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)120 static int TestAttenuateI(int width,
121                           int height,
122                           int benchmark_iterations,
123                           int disable_cpu_flags,
124                           int benchmark_cpu_info,
125                           int invert,
126                           int off) {
127   if (width < 1) {
128     width = 1;
129   }
130   const int kBpp = 4;
131   const int kStride = width * kBpp;
132   align_buffer_page_end(src_argb, kStride * height + off);
133   align_buffer_page_end(dst_argb_c, kStride * height);
134   align_buffer_page_end(dst_argb_opt, kStride * height);
135   for (int i = 0; i < kStride * height; ++i) {
136     src_argb[i + off] = (fastrand() & 0xff);
137   }
138   memset(dst_argb_c, 0, kStride * height);
139   memset(dst_argb_opt, 0, kStride * height);
140 
141   MaskCpuFlags(disable_cpu_flags);
142   ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
143                 invert * height);
144   MaskCpuFlags(benchmark_cpu_info);
145   for (int i = 0; i < benchmark_iterations; ++i) {
146     ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
147                   invert * height);
148   }
149   int max_diff = 0;
150   for (int i = 0; i < kStride * height; ++i) {
151     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
152                        static_cast<int>(dst_argb_opt[i]));
153     if (abs_diff > max_diff) {
154       max_diff = abs_diff;
155     }
156   }
157   free_aligned_buffer_page_end(src_argb);
158   free_aligned_buffer_page_end(dst_argb_c);
159   free_aligned_buffer_page_end(dst_argb_opt);
160   return max_diff;
161 }
162 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Any)163 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
164   int max_diff = TestAttenuateI(benchmark_width_ + 1, benchmark_height_,
165                                 benchmark_iterations_, disable_cpu_flags_,
166                                 benchmark_cpu_info_, +1, 0);
167 
168   EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
169 }
170 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Unaligned)171 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
172   int max_diff =
173       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
174                      disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
175   EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
176 }
177 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Invert)178 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
179   int max_diff =
180       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
181                      disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
182   EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
183 }
184 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Opt)185 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
186   int max_diff =
187       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
188                      disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
189   EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
190 }
191 
TestUnattenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)192 static int TestUnattenuateI(int width,
193                             int height,
194                             int benchmark_iterations,
195                             int disable_cpu_flags,
196                             int benchmark_cpu_info,
197                             int invert,
198                             int off) {
199   if (width < 1) {
200     width = 1;
201   }
202   const int kBpp = 4;
203   const int kStride = width * kBpp;
204   align_buffer_page_end(src_argb, kStride * height + off);
205   align_buffer_page_end(dst_argb_c, kStride * height);
206   align_buffer_page_end(dst_argb_opt, kStride * height);
207   for (int i = 0; i < kStride * height; ++i) {
208     src_argb[i + off] = (fastrand() & 0xff);
209   }
210   ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width,
211                 height);
212   memset(dst_argb_c, 0, kStride * height);
213   memset(dst_argb_opt, 0, kStride * height);
214 
215   MaskCpuFlags(disable_cpu_flags);
216   ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
217                   invert * height);
218   MaskCpuFlags(benchmark_cpu_info);
219   for (int i = 0; i < benchmark_iterations; ++i) {
220     ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
221                     invert * height);
222   }
223   int max_diff = 0;
224   for (int i = 0; i < kStride * height; ++i) {
225     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
226                        static_cast<int>(dst_argb_opt[i]));
227     if (abs_diff > max_diff) {
228       max_diff = abs_diff;
229     }
230   }
231   free_aligned_buffer_page_end(src_argb);
232   free_aligned_buffer_page_end(dst_argb_c);
233   free_aligned_buffer_page_end(dst_argb_opt);
234   return max_diff;
235 }
236 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Any)237 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
238   int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
239                                   benchmark_iterations_, disable_cpu_flags_,
240                                   benchmark_cpu_info_, +1, 0);
241   EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
242 }
243 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Unaligned)244 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
245   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
246                                   benchmark_iterations_, disable_cpu_flags_,
247                                   benchmark_cpu_info_, +1, 1);
248   EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
249 }
250 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Invert)251 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
252   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
253                                   benchmark_iterations_, disable_cpu_flags_,
254                                   benchmark_cpu_info_, -1, 0);
255   EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
256 }
257 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Opt)258 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
259   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
260                                   benchmark_iterations_, disable_cpu_flags_,
261                                   benchmark_cpu_info_, +1, 0);
262   EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
263 }
264 
TEST_F(LibYUVPlanarTest,TestARGBComputeCumulativeSum)265 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
266   SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]);
267   SIMD_ALIGNED(int32_t added_pixels[16][16][4]);
268 
269   for (int y = 0; y < 16; ++y) {
270     for (int x = 0; x < 16; ++x) {
271       orig_pixels[y][x][0] = 1u;
272       orig_pixels[y][x][1] = 2u;
273       orig_pixels[y][x][2] = 3u;
274       orig_pixels[y][x][3] = 255u;
275     }
276   }
277 
278   ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4,
279                            &added_pixels[0][0][0], 16 * 4, 16, 16);
280 
281   for (int y = 0; y < 16; ++y) {
282     for (int x = 0; x < 16; ++x) {
283       EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]);
284       EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]);
285       EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]);
286       EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]);
287     }
288   }
289 }
290 
291 // near is for legacy platforms.
TEST_F(LibYUVPlanarTest,TestARGBGray)292 TEST_F(LibYUVPlanarTest, TestARGBGray) {
293   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
294   memset(orig_pixels, 0, sizeof(orig_pixels));
295 
296   // Test blue
297   orig_pixels[0][0] = 255u;
298   orig_pixels[0][1] = 0u;
299   orig_pixels[0][2] = 0u;
300   orig_pixels[0][3] = 128u;
301   // Test green
302   orig_pixels[1][0] = 0u;
303   orig_pixels[1][1] = 255u;
304   orig_pixels[1][2] = 0u;
305   orig_pixels[1][3] = 0u;
306   // Test red
307   orig_pixels[2][0] = 0u;
308   orig_pixels[2][1] = 0u;
309   orig_pixels[2][2] = 255u;
310   orig_pixels[2][3] = 255u;
311   // Test black
312   orig_pixels[3][0] = 0u;
313   orig_pixels[3][1] = 0u;
314   orig_pixels[3][2] = 0u;
315   orig_pixels[3][3] = 255u;
316   // Test white
317   orig_pixels[4][0] = 255u;
318   orig_pixels[4][1] = 255u;
319   orig_pixels[4][2] = 255u;
320   orig_pixels[4][3] = 255u;
321   // Test color
322   orig_pixels[5][0] = 16u;
323   orig_pixels[5][1] = 64u;
324   orig_pixels[5][2] = 192u;
325   orig_pixels[5][3] = 224u;
326   // Do 16 to test asm version.
327   ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
328   EXPECT_NEAR(29u, orig_pixels[0][0], 1);
329   EXPECT_NEAR(29u, orig_pixels[0][1], 1);
330   EXPECT_NEAR(29u, orig_pixels[0][2], 1);
331   EXPECT_EQ(128u, orig_pixels[0][3]);
332   EXPECT_EQ(149u, orig_pixels[1][0]);
333   EXPECT_EQ(149u, orig_pixels[1][1]);
334   EXPECT_EQ(149u, orig_pixels[1][2]);
335   EXPECT_EQ(0u, orig_pixels[1][3]);
336   EXPECT_NEAR(77u, orig_pixels[2][0], 1);
337   EXPECT_NEAR(77u, orig_pixels[2][1], 1);
338   EXPECT_NEAR(77u, orig_pixels[2][2], 1);
339   EXPECT_EQ(255u, orig_pixels[2][3]);
340   EXPECT_EQ(0u, orig_pixels[3][0]);
341   EXPECT_EQ(0u, orig_pixels[3][1]);
342   EXPECT_EQ(0u, orig_pixels[3][2]);
343   EXPECT_EQ(255u, orig_pixels[3][3]);
344   EXPECT_EQ(255u, orig_pixels[4][0]);
345   EXPECT_EQ(255u, orig_pixels[4][1]);
346   EXPECT_EQ(255u, orig_pixels[4][2]);
347   EXPECT_EQ(255u, orig_pixels[4][3]);
348   EXPECT_NEAR(97u, orig_pixels[5][0], 1);
349   EXPECT_NEAR(97u, orig_pixels[5][1], 1);
350   EXPECT_NEAR(97u, orig_pixels[5][2], 1);
351   EXPECT_EQ(224u, orig_pixels[5][3]);
352   for (int i = 0; i < 1280; ++i) {
353     orig_pixels[i][0] = i;
354     orig_pixels[i][1] = i / 2;
355     orig_pixels[i][2] = i / 3;
356     orig_pixels[i][3] = i;
357   }
358   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
359     ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
360   }
361 }
362 
TEST_F(LibYUVPlanarTest,TestARGBGrayTo)363 TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
364   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
365   SIMD_ALIGNED(uint8_t gray_pixels[1280][4]);
366   memset(orig_pixels, 0, sizeof(orig_pixels));
367 
368   // Test blue
369   orig_pixels[0][0] = 255u;
370   orig_pixels[0][1] = 0u;
371   orig_pixels[0][2] = 0u;
372   orig_pixels[0][3] = 128u;
373   // Test green
374   orig_pixels[1][0] = 0u;
375   orig_pixels[1][1] = 255u;
376   orig_pixels[1][2] = 0u;
377   orig_pixels[1][3] = 0u;
378   // Test red
379   orig_pixels[2][0] = 0u;
380   orig_pixels[2][1] = 0u;
381   orig_pixels[2][2] = 255u;
382   orig_pixels[2][3] = 255u;
383   // Test black
384   orig_pixels[3][0] = 0u;
385   orig_pixels[3][1] = 0u;
386   orig_pixels[3][2] = 0u;
387   orig_pixels[3][3] = 255u;
388   // Test white
389   orig_pixels[4][0] = 255u;
390   orig_pixels[4][1] = 255u;
391   orig_pixels[4][2] = 255u;
392   orig_pixels[4][3] = 255u;
393   // Test color
394   orig_pixels[5][0] = 16u;
395   orig_pixels[5][1] = 64u;
396   orig_pixels[5][2] = 192u;
397   orig_pixels[5][3] = 224u;
398   // Do 16 to test asm version.
399   ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
400   EXPECT_NEAR(30u, gray_pixels[0][0], 1);
401   EXPECT_NEAR(30u, gray_pixels[0][1], 1);
402   EXPECT_NEAR(30u, gray_pixels[0][2], 1);
403   EXPECT_NEAR(128u, gray_pixels[0][3], 1);
404   EXPECT_NEAR(149u, gray_pixels[1][0], 1);
405   EXPECT_NEAR(149u, gray_pixels[1][1], 1);
406   EXPECT_NEAR(149u, gray_pixels[1][2], 1);
407   EXPECT_NEAR(0u, gray_pixels[1][3], 1);
408   EXPECT_NEAR(76u, gray_pixels[2][0], 1);
409   EXPECT_NEAR(76u, gray_pixels[2][1], 1);
410   EXPECT_NEAR(76u, gray_pixels[2][2], 1);
411   EXPECT_NEAR(255u, gray_pixels[2][3], 1);
412   EXPECT_NEAR(0u, gray_pixels[3][0], 1);
413   EXPECT_NEAR(0u, gray_pixels[3][1], 1);
414   EXPECT_NEAR(0u, gray_pixels[3][2], 1);
415   EXPECT_NEAR(255u, gray_pixels[3][3], 1);
416   EXPECT_NEAR(255u, gray_pixels[4][0], 1);
417   EXPECT_NEAR(255u, gray_pixels[4][1], 1);
418   EXPECT_NEAR(255u, gray_pixels[4][2], 1);
419   EXPECT_NEAR(255u, gray_pixels[4][3], 1);
420   EXPECT_NEAR(96u, gray_pixels[5][0], 1);
421   EXPECT_NEAR(96u, gray_pixels[5][1], 1);
422   EXPECT_NEAR(96u, gray_pixels[5][2], 1);
423   EXPECT_NEAR(224u, gray_pixels[5][3], 1);
424   for (int i = 0; i < 1280; ++i) {
425     orig_pixels[i][0] = i;
426     orig_pixels[i][1] = i / 2;
427     orig_pixels[i][2] = i / 3;
428     orig_pixels[i][3] = i;
429   }
430   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
431     ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
432   }
433 
434   for (int i = 0; i < 256; ++i) {
435     orig_pixels[i][0] = i;
436     orig_pixels[i][1] = i;
437     orig_pixels[i][2] = i;
438     orig_pixels[i][3] = i;
439   }
440   ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1);
441   for (int i = 0; i < 256; ++i) {
442     EXPECT_EQ(i, orig_pixels[i][0]);
443     EXPECT_EQ(i, orig_pixels[i][1]);
444     EXPECT_EQ(i, orig_pixels[i][2]);
445     EXPECT_EQ(i, orig_pixels[i][3]);
446   }
447 }
448 
TEST_F(LibYUVPlanarTest,TestARGBSepia)449 TEST_F(LibYUVPlanarTest, TestARGBSepia) {
450   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
451   memset(orig_pixels, 0, sizeof(orig_pixels));
452 
453   // Test blue
454   orig_pixels[0][0] = 255u;
455   orig_pixels[0][1] = 0u;
456   orig_pixels[0][2] = 0u;
457   orig_pixels[0][3] = 128u;
458   // Test green
459   orig_pixels[1][0] = 0u;
460   orig_pixels[1][1] = 255u;
461   orig_pixels[1][2] = 0u;
462   orig_pixels[1][3] = 0u;
463   // Test red
464   orig_pixels[2][0] = 0u;
465   orig_pixels[2][1] = 0u;
466   orig_pixels[2][2] = 255u;
467   orig_pixels[2][3] = 255u;
468   // Test black
469   orig_pixels[3][0] = 0u;
470   orig_pixels[3][1] = 0u;
471   orig_pixels[3][2] = 0u;
472   orig_pixels[3][3] = 255u;
473   // Test white
474   orig_pixels[4][0] = 255u;
475   orig_pixels[4][1] = 255u;
476   orig_pixels[4][2] = 255u;
477   orig_pixels[4][3] = 255u;
478   // Test color
479   orig_pixels[5][0] = 16u;
480   orig_pixels[5][1] = 64u;
481   orig_pixels[5][2] = 192u;
482   orig_pixels[5][3] = 224u;
483   // Do 16 to test asm version.
484   ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
485   EXPECT_EQ(33u, orig_pixels[0][0]);
486   EXPECT_EQ(43u, orig_pixels[0][1]);
487   EXPECT_EQ(47u, orig_pixels[0][2]);
488   EXPECT_EQ(128u, orig_pixels[0][3]);
489   EXPECT_EQ(135u, orig_pixels[1][0]);
490   EXPECT_EQ(175u, orig_pixels[1][1]);
491   EXPECT_EQ(195u, orig_pixels[1][2]);
492   EXPECT_EQ(0u, orig_pixels[1][3]);
493   EXPECT_EQ(69u, orig_pixels[2][0]);
494   EXPECT_EQ(89u, orig_pixels[2][1]);
495   EXPECT_EQ(99u, orig_pixels[2][2]);
496   EXPECT_EQ(255u, orig_pixels[2][3]);
497   EXPECT_EQ(0u, orig_pixels[3][0]);
498   EXPECT_EQ(0u, orig_pixels[3][1]);
499   EXPECT_EQ(0u, orig_pixels[3][2]);
500   EXPECT_EQ(255u, orig_pixels[3][3]);
501   EXPECT_EQ(239u, orig_pixels[4][0]);
502   EXPECT_EQ(255u, orig_pixels[4][1]);
503   EXPECT_EQ(255u, orig_pixels[4][2]);
504   EXPECT_EQ(255u, orig_pixels[4][3]);
505   EXPECT_EQ(88u, orig_pixels[5][0]);
506   EXPECT_EQ(114u, orig_pixels[5][1]);
507   EXPECT_EQ(127u, orig_pixels[5][2]);
508   EXPECT_EQ(224u, orig_pixels[5][3]);
509 
510   for (int i = 0; i < 1280; ++i) {
511     orig_pixels[i][0] = i;
512     orig_pixels[i][1] = i / 2;
513     orig_pixels[i][2] = i / 3;
514     orig_pixels[i][3] = i;
515   }
516   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
517     ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
518   }
519 }
520 
TEST_F(LibYUVPlanarTest,TestARGBColorMatrix)521 TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
522   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
523   SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
524   SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
525 
526   // Matrix for Sepia.
527   SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
528       17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
529       24 / 2, 98 / 2, 50 / 2, 0, 0,      0,      0,      64,  // Copy alpha.
530   };
531   memset(orig_pixels, 0, sizeof(orig_pixels));
532 
533   // Test blue
534   orig_pixels[0][0] = 255u;
535   orig_pixels[0][1] = 0u;
536   orig_pixels[0][2] = 0u;
537   orig_pixels[0][3] = 128u;
538   // Test green
539   orig_pixels[1][0] = 0u;
540   orig_pixels[1][1] = 255u;
541   orig_pixels[1][2] = 0u;
542   orig_pixels[1][3] = 0u;
543   // Test red
544   orig_pixels[2][0] = 0u;
545   orig_pixels[2][1] = 0u;
546   orig_pixels[2][2] = 255u;
547   orig_pixels[2][3] = 255u;
548   // Test color
549   orig_pixels[3][0] = 16u;
550   orig_pixels[3][1] = 64u;
551   orig_pixels[3][2] = 192u;
552   orig_pixels[3][3] = 224u;
553   // Do 16 to test asm version.
554   ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
555                   &kRGBToSepia[0], 16, 1);
556   EXPECT_EQ(31u, dst_pixels_opt[0][0]);
557   EXPECT_EQ(43u, dst_pixels_opt[0][1]);
558   EXPECT_EQ(47u, dst_pixels_opt[0][2]);
559   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
560   EXPECT_EQ(135u, dst_pixels_opt[1][0]);
561   EXPECT_EQ(175u, dst_pixels_opt[1][1]);
562   EXPECT_EQ(195u, dst_pixels_opt[1][2]);
563   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
564   EXPECT_EQ(67u, dst_pixels_opt[2][0]);
565   EXPECT_EQ(87u, dst_pixels_opt[2][1]);
566   EXPECT_EQ(99u, dst_pixels_opt[2][2]);
567   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
568   EXPECT_EQ(87u, dst_pixels_opt[3][0]);
569   EXPECT_EQ(112u, dst_pixels_opt[3][1]);
570   EXPECT_EQ(127u, dst_pixels_opt[3][2]);
571   EXPECT_EQ(224u, dst_pixels_opt[3][3]);
572 
573   for (int i = 0; i < 1280; ++i) {
574     orig_pixels[i][0] = i;
575     orig_pixels[i][1] = i / 2;
576     orig_pixels[i][2] = i / 3;
577     orig_pixels[i][3] = i;
578   }
579   MaskCpuFlags(disable_cpu_flags_);
580   ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
581                   &kRGBToSepia[0], 1280, 1);
582   MaskCpuFlags(benchmark_cpu_info_);
583 
584   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
585     ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
586                     &kRGBToSepia[0], 1280, 1);
587   }
588 
589   for (int i = 0; i < 1280; ++i) {
590     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
591     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
592     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
593     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
594   }
595 }
596 
TEST_F(LibYUVPlanarTest,TestRGBColorMatrix)597 TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
598   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
599 
600   // Matrix for Sepia.
601   SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
602       17, 68, 35, 0, 22, 88, 45, 0,
603       24, 98, 50, 0, 0,  0,  0,  0,  // Unused but makes matrix 16 bytes.
604   };
605   memset(orig_pixels, 0, sizeof(orig_pixels));
606 
607   // Test blue
608   orig_pixels[0][0] = 255u;
609   orig_pixels[0][1] = 0u;
610   orig_pixels[0][2] = 0u;
611   orig_pixels[0][3] = 128u;
612   // Test green
613   orig_pixels[1][0] = 0u;
614   orig_pixels[1][1] = 255u;
615   orig_pixels[1][2] = 0u;
616   orig_pixels[1][3] = 0u;
617   // Test red
618   orig_pixels[2][0] = 0u;
619   orig_pixels[2][1] = 0u;
620   orig_pixels[2][2] = 255u;
621   orig_pixels[2][3] = 255u;
622   // Test color
623   orig_pixels[3][0] = 16u;
624   orig_pixels[3][1] = 64u;
625   orig_pixels[3][2] = 192u;
626   orig_pixels[3][3] = 224u;
627   // Do 16 to test asm version.
628   RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
629   EXPECT_EQ(31u, orig_pixels[0][0]);
630   EXPECT_EQ(43u, orig_pixels[0][1]);
631   EXPECT_EQ(47u, orig_pixels[0][2]);
632   EXPECT_EQ(128u, orig_pixels[0][3]);
633   EXPECT_EQ(135u, orig_pixels[1][0]);
634   EXPECT_EQ(175u, orig_pixels[1][1]);
635   EXPECT_EQ(195u, orig_pixels[1][2]);
636   EXPECT_EQ(0u, orig_pixels[1][3]);
637   EXPECT_EQ(67u, orig_pixels[2][0]);
638   EXPECT_EQ(87u, orig_pixels[2][1]);
639   EXPECT_EQ(99u, orig_pixels[2][2]);
640   EXPECT_EQ(255u, orig_pixels[2][3]);
641   EXPECT_EQ(87u, orig_pixels[3][0]);
642   EXPECT_EQ(112u, orig_pixels[3][1]);
643   EXPECT_EQ(127u, orig_pixels[3][2]);
644   EXPECT_EQ(224u, orig_pixels[3][3]);
645 
646   for (int i = 0; i < 1280; ++i) {
647     orig_pixels[i][0] = i;
648     orig_pixels[i][1] = i / 2;
649     orig_pixels[i][2] = i / 3;
650     orig_pixels[i][3] = i;
651   }
652   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
653     RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
654   }
655 }
656 
TEST_F(LibYUVPlanarTest,TestARGBColorTable)657 TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
658   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
659   memset(orig_pixels, 0, sizeof(orig_pixels));
660 
661   // Matrix for Sepia.
662   static const uint8_t kARGBTable[256 * 4] = {
663       1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
664   };
665 
666   orig_pixels[0][0] = 0u;
667   orig_pixels[0][1] = 0u;
668   orig_pixels[0][2] = 0u;
669   orig_pixels[0][3] = 0u;
670   orig_pixels[1][0] = 1u;
671   orig_pixels[1][1] = 1u;
672   orig_pixels[1][2] = 1u;
673   orig_pixels[1][3] = 1u;
674   orig_pixels[2][0] = 2u;
675   orig_pixels[2][1] = 2u;
676   orig_pixels[2][2] = 2u;
677   orig_pixels[2][3] = 2u;
678   orig_pixels[3][0] = 0u;
679   orig_pixels[3][1] = 1u;
680   orig_pixels[3][2] = 2u;
681   orig_pixels[3][3] = 3u;
682   // Do 16 to test asm version.
683   ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
684   EXPECT_EQ(1u, orig_pixels[0][0]);
685   EXPECT_EQ(2u, orig_pixels[0][1]);
686   EXPECT_EQ(3u, orig_pixels[0][2]);
687   EXPECT_EQ(4u, orig_pixels[0][3]);
688   EXPECT_EQ(5u, orig_pixels[1][0]);
689   EXPECT_EQ(6u, orig_pixels[1][1]);
690   EXPECT_EQ(7u, orig_pixels[1][2]);
691   EXPECT_EQ(8u, orig_pixels[1][3]);
692   EXPECT_EQ(9u, orig_pixels[2][0]);
693   EXPECT_EQ(10u, orig_pixels[2][1]);
694   EXPECT_EQ(11u, orig_pixels[2][2]);
695   EXPECT_EQ(12u, orig_pixels[2][3]);
696   EXPECT_EQ(1u, orig_pixels[3][0]);
697   EXPECT_EQ(6u, orig_pixels[3][1]);
698   EXPECT_EQ(11u, orig_pixels[3][2]);
699   EXPECT_EQ(16u, orig_pixels[3][3]);
700 
701   for (int i = 0; i < 1280; ++i) {
702     orig_pixels[i][0] = i;
703     orig_pixels[i][1] = i / 2;
704     orig_pixels[i][2] = i / 3;
705     orig_pixels[i][3] = i;
706   }
707   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
708     ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
709   }
710 }
711 
712 // Same as TestARGBColorTable except alpha does not change.
TEST_F(LibYUVPlanarTest,TestRGBColorTable)713 TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
714   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
715   memset(orig_pixels, 0, sizeof(orig_pixels));
716 
717   // Matrix for Sepia.
718   static const uint8_t kARGBTable[256 * 4] = {
719       1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
720   };
721 
722   orig_pixels[0][0] = 0u;
723   orig_pixels[0][1] = 0u;
724   orig_pixels[0][2] = 0u;
725   orig_pixels[0][3] = 0u;
726   orig_pixels[1][0] = 1u;
727   orig_pixels[1][1] = 1u;
728   orig_pixels[1][2] = 1u;
729   orig_pixels[1][3] = 1u;
730   orig_pixels[2][0] = 2u;
731   orig_pixels[2][1] = 2u;
732   orig_pixels[2][2] = 2u;
733   orig_pixels[2][3] = 2u;
734   orig_pixels[3][0] = 0u;
735   orig_pixels[3][1] = 1u;
736   orig_pixels[3][2] = 2u;
737   orig_pixels[3][3] = 3u;
738   // Do 16 to test asm version.
739   RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
740   EXPECT_EQ(1u, orig_pixels[0][0]);
741   EXPECT_EQ(2u, orig_pixels[0][1]);
742   EXPECT_EQ(3u, orig_pixels[0][2]);
743   EXPECT_EQ(0u, orig_pixels[0][3]);  // Alpha unchanged.
744   EXPECT_EQ(5u, orig_pixels[1][0]);
745   EXPECT_EQ(6u, orig_pixels[1][1]);
746   EXPECT_EQ(7u, orig_pixels[1][2]);
747   EXPECT_EQ(1u, orig_pixels[1][3]);  // Alpha unchanged.
748   EXPECT_EQ(9u, orig_pixels[2][0]);
749   EXPECT_EQ(10u, orig_pixels[2][1]);
750   EXPECT_EQ(11u, orig_pixels[2][2]);
751   EXPECT_EQ(2u, orig_pixels[2][3]);  // Alpha unchanged.
752   EXPECT_EQ(1u, orig_pixels[3][0]);
753   EXPECT_EQ(6u, orig_pixels[3][1]);
754   EXPECT_EQ(11u, orig_pixels[3][2]);
755   EXPECT_EQ(3u, orig_pixels[3][3]);  // Alpha unchanged.
756 
757   for (int i = 0; i < 1280; ++i) {
758     orig_pixels[i][0] = i;
759     orig_pixels[i][1] = i / 2;
760     orig_pixels[i][2] = i / 3;
761     orig_pixels[i][3] = i;
762   }
763   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
764     RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
765   }
766 }
767 
TEST_F(LibYUVPlanarTest,TestARGBQuantize)768 TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
769   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
770 
771   for (int i = 0; i < 1280; ++i) {
772     orig_pixels[i][0] = i;
773     orig_pixels[i][1] = i / 2;
774     orig_pixels[i][2] = i / 3;
775     orig_pixels[i][3] = i;
776   }
777   ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
778                1280, 1);
779 
780   for (int i = 0; i < 1280; ++i) {
781     EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]);
782     EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]);
783     EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]);
784     EXPECT_EQ(i & 255, orig_pixels[i][3]);
785   }
786   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
787     ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
788                  1280, 1);
789   }
790 }
791 
TEST_F(LibYUVPlanarTest,ARGBMirror_Opt)792 TEST_F(LibYUVPlanarTest, ARGBMirror_Opt) {
793   align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 4);
794   align_buffer_page_end(dst_pixels_opt,
795                         benchmark_width_ * benchmark_height_ * 4);
796   align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 4);
797 
798   MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 4);
799   MaskCpuFlags(disable_cpu_flags_);
800   ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_c,
801              benchmark_width_ * 4, benchmark_width_, benchmark_height_);
802   MaskCpuFlags(benchmark_cpu_info_);
803 
804   for (int i = 0; i < benchmark_iterations_; ++i) {
805     ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
806                benchmark_width_ * 4, benchmark_width_, benchmark_height_);
807   }
808   for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
809     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
810   }
811   free_aligned_buffer_page_end(src_pixels);
812   free_aligned_buffer_page_end(dst_pixels_opt);
813   free_aligned_buffer_page_end(dst_pixels_c);
814 }
815 
TEST_F(LibYUVPlanarTest,MirrorPlane_Opt)816 TEST_F(LibYUVPlanarTest, MirrorPlane_Opt) {
817   align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_);
818   align_buffer_page_end(dst_pixels_opt, benchmark_width_ * benchmark_height_);
819   align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_);
820 
821   MemRandomize(src_pixels, benchmark_width_ * benchmark_height_);
822   MaskCpuFlags(disable_cpu_flags_);
823   MirrorPlane(src_pixels, benchmark_width_, dst_pixels_c, benchmark_width_,
824               benchmark_width_, benchmark_height_);
825   MaskCpuFlags(benchmark_cpu_info_);
826 
827   for (int i = 0; i < benchmark_iterations_; ++i) {
828     MirrorPlane(src_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_,
829                 benchmark_width_, benchmark_height_);
830   }
831   for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
832     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
833   }
834   free_aligned_buffer_page_end(src_pixels);
835   free_aligned_buffer_page_end(dst_pixels_opt);
836   free_aligned_buffer_page_end(dst_pixels_c);
837 }
838 
TEST_F(LibYUVPlanarTest,MirrorUVPlane_Opt)839 TEST_F(LibYUVPlanarTest, MirrorUVPlane_Opt) {
840   align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 2);
841   align_buffer_page_end(dst_pixels_opt,
842                         benchmark_width_ * benchmark_height_ * 2);
843   align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 2);
844 
845   MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 2);
846   MaskCpuFlags(disable_cpu_flags_);
847   MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
848                 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
849   MaskCpuFlags(benchmark_cpu_info_);
850 
851   for (int i = 0; i < benchmark_iterations_; ++i) {
852     MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
853                   benchmark_width_ * 2, benchmark_width_, benchmark_height_);
854   }
855   for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
856     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
857   }
858   free_aligned_buffer_page_end(src_pixels);
859   free_aligned_buffer_page_end(dst_pixels_opt);
860   free_aligned_buffer_page_end(dst_pixels_c);
861 }
862 
TEST_F(LibYUVPlanarTest,TestShade)863 TEST_F(LibYUVPlanarTest, TestShade) {
864   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
865   SIMD_ALIGNED(uint8_t shade_pixels[1280][4]);
866   memset(orig_pixels, 0, sizeof(orig_pixels));
867 
868   orig_pixels[0][0] = 10u;
869   orig_pixels[0][1] = 20u;
870   orig_pixels[0][2] = 40u;
871   orig_pixels[0][3] = 80u;
872   orig_pixels[1][0] = 0u;
873   orig_pixels[1][1] = 0u;
874   orig_pixels[1][2] = 0u;
875   orig_pixels[1][3] = 255u;
876   orig_pixels[2][0] = 0u;
877   orig_pixels[2][1] = 0u;
878   orig_pixels[2][2] = 0u;
879   orig_pixels[2][3] = 0u;
880   orig_pixels[3][0] = 0u;
881   orig_pixels[3][1] = 0u;
882   orig_pixels[3][2] = 0u;
883   orig_pixels[3][3] = 0u;
884   // Do 8 pixels to allow opt version to be used.
885   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff);
886   EXPECT_EQ(10u, shade_pixels[0][0]);
887   EXPECT_EQ(20u, shade_pixels[0][1]);
888   EXPECT_EQ(40u, shade_pixels[0][2]);
889   EXPECT_EQ(40u, shade_pixels[0][3]);
890   EXPECT_EQ(0u, shade_pixels[1][0]);
891   EXPECT_EQ(0u, shade_pixels[1][1]);
892   EXPECT_EQ(0u, shade_pixels[1][2]);
893   EXPECT_EQ(128u, shade_pixels[1][3]);
894   EXPECT_EQ(0u, shade_pixels[2][0]);
895   EXPECT_EQ(0u, shade_pixels[2][1]);
896   EXPECT_EQ(0u, shade_pixels[2][2]);
897   EXPECT_EQ(0u, shade_pixels[2][3]);
898   EXPECT_EQ(0u, shade_pixels[3][0]);
899   EXPECT_EQ(0u, shade_pixels[3][1]);
900   EXPECT_EQ(0u, shade_pixels[3][2]);
901   EXPECT_EQ(0u, shade_pixels[3][3]);
902 
903   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080);
904   EXPECT_EQ(5u, shade_pixels[0][0]);
905   EXPECT_EQ(10u, shade_pixels[0][1]);
906   EXPECT_EQ(20u, shade_pixels[0][2]);
907   EXPECT_EQ(40u, shade_pixels[0][3]);
908 
909   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080);
910   EXPECT_EQ(5u, shade_pixels[0][0]);
911   EXPECT_EQ(5u, shade_pixels[0][1]);
912   EXPECT_EQ(5u, shade_pixels[0][2]);
913   EXPECT_EQ(5u, shade_pixels[0][3]);
914 
915   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
916     ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1,
917               0x80808080);
918   }
919 }
920 
TEST_F(LibYUVPlanarTest,TestARGBInterpolate)921 TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
922   SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
923   SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]);
924   SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]);
925   memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
926   memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
927 
928   orig_pixels_0[0][0] = 16u;
929   orig_pixels_0[0][1] = 32u;
930   orig_pixels_0[0][2] = 64u;
931   orig_pixels_0[0][3] = 128u;
932   orig_pixels_0[1][0] = 0u;
933   orig_pixels_0[1][1] = 0u;
934   orig_pixels_0[1][2] = 0u;
935   orig_pixels_0[1][3] = 255u;
936   orig_pixels_0[2][0] = 0u;
937   orig_pixels_0[2][1] = 0u;
938   orig_pixels_0[2][2] = 0u;
939   orig_pixels_0[2][3] = 0u;
940   orig_pixels_0[3][0] = 0u;
941   orig_pixels_0[3][1] = 0u;
942   orig_pixels_0[3][2] = 0u;
943   orig_pixels_0[3][3] = 0u;
944 
945   orig_pixels_1[0][0] = 0u;
946   orig_pixels_1[0][1] = 0u;
947   orig_pixels_1[0][2] = 0u;
948   orig_pixels_1[0][3] = 0u;
949   orig_pixels_1[1][0] = 0u;
950   orig_pixels_1[1][1] = 0u;
951   orig_pixels_1[1][2] = 0u;
952   orig_pixels_1[1][3] = 0u;
953   orig_pixels_1[2][0] = 0u;
954   orig_pixels_1[2][1] = 0u;
955   orig_pixels_1[2][2] = 0u;
956   orig_pixels_1[2][3] = 0u;
957   orig_pixels_1[3][0] = 255u;
958   orig_pixels_1[3][1] = 255u;
959   orig_pixels_1[3][2] = 255u;
960   orig_pixels_1[3][3] = 255u;
961 
962   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
963                   &interpolate_pixels[0][0], 0, 4, 1, 128);
964   EXPECT_EQ(8u, interpolate_pixels[0][0]);
965   EXPECT_EQ(16u, interpolate_pixels[0][1]);
966   EXPECT_EQ(32u, interpolate_pixels[0][2]);
967   EXPECT_EQ(64u, interpolate_pixels[0][3]);
968   EXPECT_EQ(0u, interpolate_pixels[1][0]);
969   EXPECT_EQ(0u, interpolate_pixels[1][1]);
970   EXPECT_EQ(0u, interpolate_pixels[1][2]);
971   EXPECT_EQ(128u, interpolate_pixels[1][3]);
972   EXPECT_EQ(0u, interpolate_pixels[2][0]);
973   EXPECT_EQ(0u, interpolate_pixels[2][1]);
974   EXPECT_EQ(0u, interpolate_pixels[2][2]);
975   EXPECT_EQ(0u, interpolate_pixels[2][3]);
976   EXPECT_EQ(128u, interpolate_pixels[3][0]);
977   EXPECT_EQ(128u, interpolate_pixels[3][1]);
978   EXPECT_EQ(128u, interpolate_pixels[3][2]);
979   EXPECT_EQ(128u, interpolate_pixels[3][3]);
980 
981   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
982                   &interpolate_pixels[0][0], 0, 4, 1, 0);
983   EXPECT_EQ(16u, interpolate_pixels[0][0]);
984   EXPECT_EQ(32u, interpolate_pixels[0][1]);
985   EXPECT_EQ(64u, interpolate_pixels[0][2]);
986   EXPECT_EQ(128u, interpolate_pixels[0][3]);
987 
988   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
989                   &interpolate_pixels[0][0], 0, 4, 1, 192);
990 
991   EXPECT_EQ(4u, interpolate_pixels[0][0]);
992   EXPECT_EQ(8u, interpolate_pixels[0][1]);
993   EXPECT_EQ(16u, interpolate_pixels[0][2]);
994   EXPECT_EQ(32u, interpolate_pixels[0][3]);
995 
996   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
997     ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
998                     &interpolate_pixels[0][0], 0, 1280, 1, 128);
999   }
1000 }
1001 
TEST_F(LibYUVPlanarTest,TestInterpolatePlane)1002 TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
1003   SIMD_ALIGNED(uint8_t orig_pixels_0[1280]);
1004   SIMD_ALIGNED(uint8_t orig_pixels_1[1280]);
1005   SIMD_ALIGNED(uint8_t interpolate_pixels[1280]);
1006   memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
1007   memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
1008 
1009   orig_pixels_0[0] = 16u;
1010   orig_pixels_0[1] = 32u;
1011   orig_pixels_0[2] = 64u;
1012   orig_pixels_0[3] = 128u;
1013   orig_pixels_0[4] = 0u;
1014   orig_pixels_0[5] = 0u;
1015   orig_pixels_0[6] = 0u;
1016   orig_pixels_0[7] = 255u;
1017   orig_pixels_0[8] = 0u;
1018   orig_pixels_0[9] = 0u;
1019   orig_pixels_0[10] = 0u;
1020   orig_pixels_0[11] = 0u;
1021   orig_pixels_0[12] = 0u;
1022   orig_pixels_0[13] = 0u;
1023   orig_pixels_0[14] = 0u;
1024   orig_pixels_0[15] = 0u;
1025 
1026   orig_pixels_1[0] = 0u;
1027   orig_pixels_1[1] = 0u;
1028   orig_pixels_1[2] = 0u;
1029   orig_pixels_1[3] = 0u;
1030   orig_pixels_1[4] = 0u;
1031   orig_pixels_1[5] = 0u;
1032   orig_pixels_1[6] = 0u;
1033   orig_pixels_1[7] = 0u;
1034   orig_pixels_1[8] = 0u;
1035   orig_pixels_1[9] = 0u;
1036   orig_pixels_1[10] = 0u;
1037   orig_pixels_1[11] = 0u;
1038   orig_pixels_1[12] = 255u;
1039   orig_pixels_1[13] = 255u;
1040   orig_pixels_1[14] = 255u;
1041   orig_pixels_1[15] = 255u;
1042 
1043   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1044                    &interpolate_pixels[0], 0, 16, 1, 128);
1045   EXPECT_EQ(8u, interpolate_pixels[0]);
1046   EXPECT_EQ(16u, interpolate_pixels[1]);
1047   EXPECT_EQ(32u, interpolate_pixels[2]);
1048   EXPECT_EQ(64u, interpolate_pixels[3]);
1049   EXPECT_EQ(0u, interpolate_pixels[4]);
1050   EXPECT_EQ(0u, interpolate_pixels[5]);
1051   EXPECT_EQ(0u, interpolate_pixels[6]);
1052   EXPECT_EQ(128u, interpolate_pixels[7]);
1053   EXPECT_EQ(0u, interpolate_pixels[8]);
1054   EXPECT_EQ(0u, interpolate_pixels[9]);
1055   EXPECT_EQ(0u, interpolate_pixels[10]);
1056   EXPECT_EQ(0u, interpolate_pixels[11]);
1057   EXPECT_EQ(128u, interpolate_pixels[12]);
1058   EXPECT_EQ(128u, interpolate_pixels[13]);
1059   EXPECT_EQ(128u, interpolate_pixels[14]);
1060   EXPECT_EQ(128u, interpolate_pixels[15]);
1061 
1062   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1063                    &interpolate_pixels[0], 0, 16, 1, 0);
1064   EXPECT_EQ(16u, interpolate_pixels[0]);
1065   EXPECT_EQ(32u, interpolate_pixels[1]);
1066   EXPECT_EQ(64u, interpolate_pixels[2]);
1067   EXPECT_EQ(128u, interpolate_pixels[3]);
1068 
1069   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1070                    &interpolate_pixels[0], 0, 16, 1, 192);
1071 
1072   EXPECT_EQ(4u, interpolate_pixels[0]);
1073   EXPECT_EQ(8u, interpolate_pixels[1]);
1074   EXPECT_EQ(16u, interpolate_pixels[2]);
1075   EXPECT_EQ(32u, interpolate_pixels[3]);
1076 
1077   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1078     InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1079                      &interpolate_pixels[0], 0, 1280, 1, 123);
1080   }
1081 }
1082 
TEST_F(LibYUVPlanarTest,TestInterpolatePlane_16)1083 TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) {
1084   SIMD_ALIGNED(uint16_t orig_pixels_0[1280]);
1085   SIMD_ALIGNED(uint16_t orig_pixels_1[1280]);
1086   SIMD_ALIGNED(uint16_t interpolate_pixels[1280]);
1087   memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
1088   memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
1089 
1090   orig_pixels_0[0] = 16u;
1091   orig_pixels_0[1] = 32u;
1092   orig_pixels_0[2] = 64u;
1093   orig_pixels_0[3] = 128u;
1094   orig_pixels_0[4] = 0u;
1095   orig_pixels_0[5] = 0u;
1096   orig_pixels_0[6] = 0u;
1097   orig_pixels_0[7] = 255u;
1098   orig_pixels_0[8] = 0u;
1099   orig_pixels_0[9] = 0u;
1100   orig_pixels_0[10] = 0u;
1101   orig_pixels_0[11] = 0u;
1102   orig_pixels_0[12] = 0u;
1103   orig_pixels_0[13] = 0u;
1104   orig_pixels_0[14] = 0u;
1105   orig_pixels_0[15] = 0u;
1106 
1107   orig_pixels_1[0] = 0u;
1108   orig_pixels_1[1] = 0u;
1109   orig_pixels_1[2] = 0u;
1110   orig_pixels_1[3] = 0u;
1111   orig_pixels_1[4] = 0u;
1112   orig_pixels_1[5] = 0u;
1113   orig_pixels_1[6] = 0u;
1114   orig_pixels_1[7] = 0u;
1115   orig_pixels_1[8] = 0u;
1116   orig_pixels_1[9] = 0u;
1117   orig_pixels_1[10] = 0u;
1118   orig_pixels_1[11] = 0u;
1119   orig_pixels_1[12] = 255u;
1120   orig_pixels_1[13] = 255u;
1121   orig_pixels_1[14] = 255u;
1122   orig_pixels_1[15] = 255u;
1123 
1124   InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1125                       &interpolate_pixels[0], 0, 16, 1, 128);
1126   EXPECT_EQ(8u, interpolate_pixels[0]);
1127   EXPECT_EQ(16u, interpolate_pixels[1]);
1128   EXPECT_EQ(32u, interpolate_pixels[2]);
1129   EXPECT_EQ(64u, interpolate_pixels[3]);
1130   EXPECT_EQ(0u, interpolate_pixels[4]);
1131   EXPECT_EQ(0u, interpolate_pixels[5]);
1132   EXPECT_EQ(0u, interpolate_pixels[6]);
1133   EXPECT_EQ(128u, interpolate_pixels[7]);
1134   EXPECT_EQ(0u, interpolate_pixels[8]);
1135   EXPECT_EQ(0u, interpolate_pixels[9]);
1136   EXPECT_EQ(0u, interpolate_pixels[10]);
1137   EXPECT_EQ(0u, interpolate_pixels[11]);
1138   EXPECT_EQ(128u, interpolate_pixels[12]);
1139   EXPECT_EQ(128u, interpolate_pixels[13]);
1140   EXPECT_EQ(128u, interpolate_pixels[14]);
1141   EXPECT_EQ(128u, interpolate_pixels[15]);
1142 
1143   InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1144                       &interpolate_pixels[0], 0, 16, 1, 0);
1145   EXPECT_EQ(16u, interpolate_pixels[0]);
1146   EXPECT_EQ(32u, interpolate_pixels[1]);
1147   EXPECT_EQ(64u, interpolate_pixels[2]);
1148   EXPECT_EQ(128u, interpolate_pixels[3]);
1149 
1150   InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1151                       &interpolate_pixels[0], 0, 16, 1, 192);
1152 
1153   EXPECT_EQ(4u, interpolate_pixels[0]);
1154   EXPECT_EQ(8u, interpolate_pixels[1]);
1155   EXPECT_EQ(16u, interpolate_pixels[2]);
1156   EXPECT_EQ(32u, interpolate_pixels[3]);
1157 
1158   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1159     InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1160                         &interpolate_pixels[0], 0, 1280, 1, 123);
1161   }
1162 }
1163 
1164 #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
1165                  N, NEG, OFF)                                                 \
1166   TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) {                        \
1167     const int kWidth = W1280;                                                 \
1168     const int kHeight = benchmark_height_;                                    \
1169     const int kStrideA =                                                      \
1170         (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;                \
1171     const int kStrideB =                                                      \
1172         (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;                \
1173     align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF);               \
1174     align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF);               \
1175     align_buffer_page_end(dst_argb_c, kStrideB* kHeight);                     \
1176     align_buffer_page_end(dst_argb_opt, kStrideB* kHeight);                   \
1177     for (int i = 0; i < kStrideA * kHeight; ++i) {                            \
1178       src_argb_a[i + OFF] = (fastrand() & 0xff);                              \
1179       src_argb_b[i + OFF] = (fastrand() & 0xff);                              \
1180     }                                                                         \
1181     MaskCpuFlags(disable_cpu_flags_);                                         \
1182     ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA,   \
1183                     dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP);         \
1184     MaskCpuFlags(benchmark_cpu_info_);                                        \
1185     for (int i = 0; i < benchmark_iterations_; ++i) {                         \
1186       ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1187                       dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP);     \
1188     }                                                                         \
1189     for (int i = 0; i < kStrideB * kHeight; ++i) {                            \
1190       EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                              \
1191     }                                                                         \
1192     free_aligned_buffer_page_end(src_argb_a);                                 \
1193     free_aligned_buffer_page_end(src_argb_b);                                 \
1194     free_aligned_buffer_page_end(dst_argb_c);                                 \
1195     free_aligned_buffer_page_end(dst_argb_opt);                               \
1196   }
1197 
1198 #define TESTINTERPOLATE(TERP)                                                \
1199   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ + 1, TERP, _Any, +, 0)   \
1200   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
1201   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0)    \
1202   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
1203 
1204 TESTINTERPOLATE(0)
1205 TESTINTERPOLATE(64)
1206 TESTINTERPOLATE(128)
1207 TESTINTERPOLATE(192)
1208 TESTINTERPOLATE(255)
1209 
TestBlend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int attenuate)1210 static int TestBlend(int width,
1211                      int height,
1212                      int benchmark_iterations,
1213                      int disable_cpu_flags,
1214                      int benchmark_cpu_info,
1215                      int invert,
1216                      int off,
1217                      int attenuate) {
1218   if (width < 1) {
1219     width = 1;
1220   }
1221   const int kBpp = 4;
1222   const int kStride = width * kBpp;
1223   align_buffer_page_end(src_argb_a, kStride * height + off);
1224   align_buffer_page_end(src_argb_b, kStride * height + off);
1225   align_buffer_page_end(dst_argb_c, kStride * height);
1226   align_buffer_page_end(dst_argb_opt, kStride * height);
1227   for (int i = 0; i < kStride * height; ++i) {
1228     src_argb_a[i + off] = (fastrand() & 0xff);
1229     src_argb_b[i + off] = (fastrand() & 0xff);
1230   }
1231   MemRandomize(src_argb_a, kStride * height + off);
1232   MemRandomize(src_argb_b, kStride * height + off);
1233   if (attenuate) {
1234     ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
1235                   height);
1236   }
1237   memset(dst_argb_c, 255, kStride * height);
1238   memset(dst_argb_opt, 255, kStride * height);
1239 
1240   MaskCpuFlags(disable_cpu_flags);
1241   ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1242             kStride, width, invert * height);
1243   MaskCpuFlags(benchmark_cpu_info);
1244   for (int i = 0; i < benchmark_iterations; ++i) {
1245     ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride,
1246               dst_argb_opt, kStride, width, invert * height);
1247   }
1248   int max_diff = 0;
1249   for (int i = 0; i < kStride * height; ++i) {
1250     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1251                        static_cast<int>(dst_argb_opt[i]));
1252     if (abs_diff > max_diff) {
1253       max_diff = abs_diff;
1254     }
1255   }
1256   free_aligned_buffer_page_end(src_argb_a);
1257   free_aligned_buffer_page_end(src_argb_b);
1258   free_aligned_buffer_page_end(dst_argb_c);
1259   free_aligned_buffer_page_end(dst_argb_opt);
1260   return max_diff;
1261 }
1262 
TEST_F(LibYUVPlanarTest,ARGBBlend_Any)1263 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
1264   int max_diff =
1265       TestBlend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1266                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
1267   EXPECT_LE(max_diff, 1);
1268 }
1269 
TEST_F(LibYUVPlanarTest,ARGBBlend_Unaligned)1270 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
1271   int max_diff =
1272       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1273                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
1274   EXPECT_LE(max_diff, 1);
1275 }
1276 
TEST_F(LibYUVPlanarTest,ARGBBlend_Invert)1277 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
1278   int max_diff =
1279       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1280                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
1281   EXPECT_LE(max_diff, 1);
1282 }
1283 
TEST_F(LibYUVPlanarTest,ARGBBlend_Unattenuated)1284 TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) {
1285   int max_diff =
1286       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1287                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0);
1288   EXPECT_LE(max_diff, 1);
1289 }
1290 
TEST_F(LibYUVPlanarTest,ARGBBlend_Opt)1291 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
1292   int max_diff =
1293       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1294                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
1295   EXPECT_LE(max_diff, 1);
1296 }
1297 
TestBlendPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1298 static void TestBlendPlane(int width,
1299                            int height,
1300                            int benchmark_iterations,
1301                            int disable_cpu_flags,
1302                            int benchmark_cpu_info,
1303                            int invert,
1304                            int off) {
1305   if (width < 1) {
1306     width = 1;
1307   }
1308   const int kBpp = 1;
1309   const int kStride = width * kBpp;
1310   align_buffer_page_end(src_argb_a, kStride * height + off);
1311   align_buffer_page_end(src_argb_b, kStride * height + off);
1312   align_buffer_page_end(src_argb_alpha, kStride * height + off);
1313   align_buffer_page_end(dst_argb_c, kStride * height + off);
1314   align_buffer_page_end(dst_argb_opt, kStride * height + off);
1315   memset(dst_argb_c, 255, kStride * height + off);
1316   memset(dst_argb_opt, 255, kStride * height + off);
1317 
1318   // Test source is maintained exactly if alpha is 255.
1319   for (int i = 0; i < width; ++i) {
1320     src_argb_a[i + off] = i & 255;
1321     src_argb_b[i + off] = 255 - (i & 255);
1322   }
1323   memset(src_argb_alpha + off, 255, width);
1324   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1325              src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1326   for (int i = 0; i < width; ++i) {
1327     EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
1328   }
1329   // Test destination is maintained exactly if alpha is 0.
1330   memset(src_argb_alpha + off, 0, width);
1331   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1332              src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1333   for (int i = 0; i < width; ++i) {
1334     EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
1335   }
1336   for (int i = 0; i < kStride * height; ++i) {
1337     src_argb_a[i + off] = (fastrand() & 0xff);
1338     src_argb_b[i + off] = (fastrand() & 0xff);
1339     src_argb_alpha[i + off] = (fastrand() & 0xff);
1340   }
1341 
1342   MaskCpuFlags(disable_cpu_flags);
1343   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1344              src_argb_alpha + off, width, dst_argb_c + off, width, width,
1345              invert * height);
1346   MaskCpuFlags(benchmark_cpu_info);
1347   for (int i = 0; i < benchmark_iterations; ++i) {
1348     BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1349                src_argb_alpha + off, width, dst_argb_opt + off, width, width,
1350                invert * height);
1351   }
1352   for (int i = 0; i < kStride * height; ++i) {
1353     EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
1354   }
1355   free_aligned_buffer_page_end(src_argb_a);
1356   free_aligned_buffer_page_end(src_argb_b);
1357   free_aligned_buffer_page_end(src_argb_alpha);
1358   free_aligned_buffer_page_end(dst_argb_c);
1359   free_aligned_buffer_page_end(dst_argb_opt);
1360 }
1361 
TEST_F(LibYUVPlanarTest,BlendPlane_Opt)1362 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
1363   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1364                  disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1365 }
TEST_F(LibYUVPlanarTest,BlendPlane_Unaligned)1366 TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
1367   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1368                  disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1369 }
TEST_F(LibYUVPlanarTest,BlendPlane_Any)1370 TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
1371   TestBlendPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1372                  disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1373 }
TEST_F(LibYUVPlanarTest,BlendPlane_Invert)1374 TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
1375   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1376                  disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
1377 }
1378 
1379 #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
1380 
TestI420Blend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1381 static void TestI420Blend(int width,
1382                           int height,
1383                           int benchmark_iterations,
1384                           int disable_cpu_flags,
1385                           int benchmark_cpu_info,
1386                           int invert,
1387                           int off) {
1388   width = ((width) > 0) ? (width) : 1;
1389   const int kStrideUV = SUBSAMPLE(width, 2);
1390   const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
1391   align_buffer_page_end(src_y0, width * height + off);
1392   align_buffer_page_end(src_u0, kSizeUV + off);
1393   align_buffer_page_end(src_v0, kSizeUV + off);
1394   align_buffer_page_end(src_y1, width * height + off);
1395   align_buffer_page_end(src_u1, kSizeUV + off);
1396   align_buffer_page_end(src_v1, kSizeUV + off);
1397   align_buffer_page_end(src_a, width * height + off);
1398   align_buffer_page_end(dst_y_c, width * height + off);
1399   align_buffer_page_end(dst_u_c, kSizeUV + off);
1400   align_buffer_page_end(dst_v_c, kSizeUV + off);
1401   align_buffer_page_end(dst_y_opt, width * height + off);
1402   align_buffer_page_end(dst_u_opt, kSizeUV + off);
1403   align_buffer_page_end(dst_v_opt, kSizeUV + off);
1404 
1405   MemRandomize(src_y0, width * height + off);
1406   MemRandomize(src_u0, kSizeUV + off);
1407   MemRandomize(src_v0, kSizeUV + off);
1408   MemRandomize(src_y1, width * height + off);
1409   MemRandomize(src_u1, kSizeUV + off);
1410   MemRandomize(src_v1, kSizeUV + off);
1411   MemRandomize(src_a, width * height + off);
1412   memset(dst_y_c, 255, width * height + off);
1413   memset(dst_u_c, 255, kSizeUV + off);
1414   memset(dst_v_c, 255, kSizeUV + off);
1415   memset(dst_y_opt, 255, width * height + off);
1416   memset(dst_u_opt, 255, kSizeUV + off);
1417   memset(dst_v_opt, 255, kSizeUV + off);
1418 
1419   MaskCpuFlags(disable_cpu_flags);
1420   I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1421             kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1422             src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width,
1423             dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width,
1424             invert * height);
1425   MaskCpuFlags(benchmark_cpu_info);
1426   for (int i = 0; i < benchmark_iterations; ++i) {
1427     I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1428               kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1429               src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off,
1430               width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV,
1431               width, invert * height);
1432   }
1433   for (int i = 0; i < width * height; ++i) {
1434     EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
1435   }
1436   for (int i = 0; i < kSizeUV; ++i) {
1437     EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]);
1438     EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]);
1439   }
1440   free_aligned_buffer_page_end(src_y0);
1441   free_aligned_buffer_page_end(src_u0);
1442   free_aligned_buffer_page_end(src_v0);
1443   free_aligned_buffer_page_end(src_y1);
1444   free_aligned_buffer_page_end(src_u1);
1445   free_aligned_buffer_page_end(src_v1);
1446   free_aligned_buffer_page_end(src_a);
1447   free_aligned_buffer_page_end(dst_y_c);
1448   free_aligned_buffer_page_end(dst_u_c);
1449   free_aligned_buffer_page_end(dst_v_c);
1450   free_aligned_buffer_page_end(dst_y_opt);
1451   free_aligned_buffer_page_end(dst_u_opt);
1452   free_aligned_buffer_page_end(dst_v_opt);
1453 }
1454 
TEST_F(LibYUVPlanarTest,I420Blend_Opt)1455 TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
1456   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1457                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1458 }
TEST_F(LibYUVPlanarTest,I420Blend_Unaligned)1459 TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
1460   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1461                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1462 }
1463 
1464 // TODO(fbarchard): DISABLED because _Any uses C.  Avoid C and re-enable.
TEST_F(LibYUVPlanarTest,DISABLED_I420Blend_Any)1465 TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
1466   TestI420Blend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1467                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1468 }
TEST_F(LibYUVPlanarTest,I420Blend_Invert)1469 TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
1470   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1471                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1472 }
1473 
TEST_F(LibYUVPlanarTest,TestAffine)1474 TEST_F(LibYUVPlanarTest, TestAffine) {
1475   SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
1476   SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]);
1477 
1478   for (int i = 0; i < 1280; ++i) {
1479     for (int j = 0; j < 4; ++j) {
1480       orig_pixels_0[i][j] = i;
1481     }
1482   }
1483 
1484   float uv_step[4] = {0.f, 0.f, 0.75f, 0.f};
1485 
1486   ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step,
1487                   1280);
1488   EXPECT_EQ(0u, interpolate_pixels_C[0][0]);
1489   EXPECT_EQ(96u, interpolate_pixels_C[128][0]);
1490   EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
1491 
1492 #if defined(HAS_ARGBAFFINEROW_SSE2)
1493   SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]);
1494   ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1495                      uv_step, 1280);
1496   EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
1497 
1498   int has_sse2 = TestCpuFlag(kCpuHasSSE2);
1499   if (has_sse2) {
1500     for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1501       ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1502                          uv_step, 1280);
1503     }
1504   }
1505 #endif
1506 }
1507 
TEST_F(LibYUVPlanarTest,TestCopyPlane)1508 TEST_F(LibYUVPlanarTest, TestCopyPlane) {
1509   int err = 0;
1510   int yw = benchmark_width_;
1511   int yh = benchmark_height_;
1512   int b = 12;
1513   int i, j;
1514 
1515   int y_plane_size = (yw + b * 2) * (yh + b * 2);
1516   align_buffer_page_end(orig_y, y_plane_size);
1517   align_buffer_page_end(dst_c, y_plane_size);
1518   align_buffer_page_end(dst_opt, y_plane_size);
1519 
1520   memset(orig_y, 0, y_plane_size);
1521   memset(dst_c, 0, y_plane_size);
1522   memset(dst_opt, 0, y_plane_size);
1523 
1524   // Fill image buffers with random data.
1525   for (i = b; i < (yh + b); ++i) {
1526     for (j = b; j < (yw + b); ++j) {
1527       orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
1528     }
1529   }
1530 
1531   // Fill destination buffers with random data.
1532   for (i = 0; i < y_plane_size; ++i) {
1533     uint8_t random_number = fastrand() & 0x7f;
1534     dst_c[i] = random_number;
1535     dst_opt[i] = dst_c[i];
1536   }
1537 
1538   int y_off = b * (yw + b * 2) + b;
1539 
1540   int y_st = yw + b * 2;
1541   int stride = 8;
1542 
1543   // Disable all optimizations.
1544   MaskCpuFlags(disable_cpu_flags_);
1545   for (j = 0; j < benchmark_iterations_; j++) {
1546     CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
1547   }
1548 
1549   // Enable optimizations.
1550   MaskCpuFlags(benchmark_cpu_info_);
1551   for (j = 0; j < benchmark_iterations_; j++) {
1552     CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
1553   }
1554 
1555   for (i = 0; i < y_plane_size; ++i) {
1556     if (dst_c[i] != dst_opt[i]) {
1557       ++err;
1558     }
1559   }
1560 
1561   free_aligned_buffer_page_end(orig_y);
1562   free_aligned_buffer_page_end(dst_c);
1563   free_aligned_buffer_page_end(dst_opt);
1564 
1565   EXPECT_EQ(0, err);
1566 }
1567 
TEST_F(LibYUVPlanarTest,CopyPlane_Opt)1568 TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
1569   int i;
1570   int y_plane_size = benchmark_width_ * benchmark_height_;
1571   align_buffer_page_end(orig_y, y_plane_size);
1572   align_buffer_page_end(dst_c, y_plane_size);
1573   align_buffer_page_end(dst_opt, y_plane_size);
1574 
1575   MemRandomize(orig_y, y_plane_size);
1576   memset(dst_c, 1, y_plane_size);
1577   memset(dst_opt, 2, y_plane_size);
1578 
1579   // Disable all optimizations.
1580   MaskCpuFlags(disable_cpu_flags_);
1581   for (i = 0; i < benchmark_iterations_; i++) {
1582     CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_,
1583               benchmark_width_, benchmark_height_);
1584   }
1585 
1586   // Enable optimizations.
1587   MaskCpuFlags(benchmark_cpu_info_);
1588   for (i = 0; i < benchmark_iterations_; i++) {
1589     CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_,
1590               benchmark_width_, benchmark_height_);
1591   }
1592 
1593   for (i = 0; i < y_plane_size; ++i) {
1594     EXPECT_EQ(dst_c[i], dst_opt[i]);
1595   }
1596 
1597   free_aligned_buffer_page_end(orig_y);
1598   free_aligned_buffer_page_end(dst_c);
1599   free_aligned_buffer_page_end(dst_opt);
1600 }
1601 
TEST_F(LibYUVPlanarTest,TestCopyPlaneZero)1602 TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) {
1603   // Test to verify copying a rect with a zero height or width does
1604   // not touch destination memory.
1605   uint8_t src = 42;
1606   uint8_t dst = 0;
1607 
1608   // Disable all optimizations.
1609   MaskCpuFlags(disable_cpu_flags_);
1610   CopyPlane(&src, 0, &dst, 0, 0, 0);
1611   EXPECT_EQ(src, 42);
1612   EXPECT_EQ(dst, 0);
1613 
1614   CopyPlane(&src, 1, &dst, 1, 1, 0);
1615   EXPECT_EQ(src, 42);
1616   EXPECT_EQ(dst, 0);
1617 
1618   CopyPlane(&src, 1, &dst, 1, 0, 1);
1619   EXPECT_EQ(src, 42);
1620   EXPECT_EQ(dst, 0);
1621 
1622   // Enable optimizations.
1623   MaskCpuFlags(benchmark_cpu_info_);
1624   CopyPlane(&src, 0, &dst, 0, 0, 0);
1625   EXPECT_EQ(src, 42);
1626   EXPECT_EQ(dst, 0);
1627 
1628   CopyPlane(&src, 1, &dst, 1, 1, 0);
1629   EXPECT_EQ(src, 42);
1630   EXPECT_EQ(dst, 0);
1631 
1632   CopyPlane(&src, 1, &dst, 1, 0, 1);
1633   EXPECT_EQ(src, 42);
1634   EXPECT_EQ(dst, 0);
1635 }
1636 
TEST_F(LibYUVPlanarTest,TestDetilePlane)1637 TEST_F(LibYUVPlanarTest, TestDetilePlane) {
1638   int i, j;
1639 
1640   // orig is tiled.  Allocate enough memory for tiles.
1641   int orig_width = (benchmark_width_ + 15) & ~15;
1642   int orig_height = (benchmark_height_ + 15) & ~15;
1643   int orig_plane_size = orig_width * orig_height;
1644   int y_plane_size = benchmark_width_ * benchmark_height_;
1645   align_buffer_page_end(orig_y, orig_plane_size);
1646   align_buffer_page_end(dst_c, y_plane_size);
1647   align_buffer_page_end(dst_opt, y_plane_size);
1648 
1649   MemRandomize(orig_y, orig_plane_size);
1650   memset(dst_c, 0, y_plane_size);
1651   memset(dst_opt, 0, y_plane_size);
1652 
1653   // Disable all optimizations.
1654   MaskCpuFlags(disable_cpu_flags_);
1655   for (j = 0; j < benchmark_iterations_; j++) {
1656     DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_,
1657                 benchmark_height_, 16);
1658   }
1659 
1660   // Enable optimizations.
1661   MaskCpuFlags(benchmark_cpu_info_);
1662   for (j = 0; j < benchmark_iterations_; j++) {
1663     DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_,
1664                 benchmark_height_, 16);
1665   }
1666 
1667   for (i = 0; i < y_plane_size; ++i) {
1668     EXPECT_EQ(dst_c[i], dst_opt[i]);
1669   }
1670 
1671   free_aligned_buffer_page_end(orig_y);
1672   free_aligned_buffer_page_end(dst_c);
1673   free_aligned_buffer_page_end(dst_opt);
1674 }
1675 
1676 // Compares DetileSplitUV to 2 step Detile + SplitUV
TEST_F(LibYUVPlanarTest,TestDetileSplitUVPlane_Correctness)1677 TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
1678   int i, j;
1679 
1680   // orig is tiled.  Allocate enough memory for tiles.
1681   int orig_width = (benchmark_width_ + 15) & ~15;
1682   int orig_height = (benchmark_height_ + 15) & ~15;
1683   int orig_plane_size = orig_width * orig_height;
1684   int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
1685   align_buffer_page_end(orig_uv, orig_plane_size);
1686   align_buffer_page_end(detiled_uv, orig_plane_size);
1687   align_buffer_page_end(dst_u_two_stage, uv_plane_size);
1688   align_buffer_page_end(dst_u_opt, uv_plane_size);
1689   align_buffer_page_end(dst_v_two_stage, uv_plane_size);
1690   align_buffer_page_end(dst_v_opt, uv_plane_size);
1691 
1692   MemRandomize(orig_uv, orig_plane_size);
1693   memset(detiled_uv, 0, orig_plane_size);
1694   memset(dst_u_two_stage, 0, uv_plane_size);
1695   memset(dst_u_opt, 0, uv_plane_size);
1696   memset(dst_v_two_stage, 0, uv_plane_size);
1697   memset(dst_v_opt, 0, uv_plane_size);
1698 
1699   DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2,
1700                      dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
1701                      benchmark_height_, 16);
1702 
1703   // Benchmark 2 step conversion for comparison.
1704   for (j = 0; j < benchmark_iterations_; j++) {
1705     DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
1706                 benchmark_width_, benchmark_height_, 16);
1707     SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
1708                  (benchmark_width_ + 1) / 2, dst_v_two_stage,
1709                  (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
1710                  benchmark_height_);
1711   }
1712 
1713   for (i = 0; i < uv_plane_size; ++i) {
1714     EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
1715     EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
1716   }
1717 
1718   free_aligned_buffer_page_end(orig_uv);
1719   free_aligned_buffer_page_end(detiled_uv);
1720   free_aligned_buffer_page_end(dst_u_two_stage);
1721   free_aligned_buffer_page_end(dst_u_opt);
1722   free_aligned_buffer_page_end(dst_v_two_stage);
1723   free_aligned_buffer_page_end(dst_v_opt);
1724 }
1725 
TEST_F(LibYUVPlanarTest,TestDetileSplitUVPlane_Benchmark)1726 TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
1727   int i, j;
1728 
1729   // orig is tiled.  Allocate enough memory for tiles.
1730   int orig_width = (benchmark_width_ + 15) & ~15;
1731   int orig_height = (benchmark_height_ + 15) & ~15;
1732   int orig_plane_size = orig_width * orig_height;
1733   int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
1734   align_buffer_page_end(orig_uv, orig_plane_size);
1735   align_buffer_page_end(dst_u_c, uv_plane_size);
1736   align_buffer_page_end(dst_u_opt, uv_plane_size);
1737   align_buffer_page_end(dst_v_c, uv_plane_size);
1738   align_buffer_page_end(dst_v_opt, uv_plane_size);
1739 
1740   MemRandomize(orig_uv, orig_plane_size);
1741   memset(dst_u_c, 0, uv_plane_size);
1742   memset(dst_u_opt, 0, uv_plane_size);
1743   memset(dst_v_c, 0, uv_plane_size);
1744   memset(dst_v_opt, 0, uv_plane_size);
1745 
1746   // Disable all optimizations.
1747   MaskCpuFlags(disable_cpu_flags_);
1748 
1749   DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
1750                      dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
1751                      benchmark_height_, 16);
1752 
1753   // Enable optimizations.
1754   MaskCpuFlags(benchmark_cpu_info_);
1755 
1756   for (j = 0; j < benchmark_iterations_; j++) {
1757     DetileSplitUVPlane(
1758         orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
1759         (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
1760   }
1761 
1762   for (i = 0; i < uv_plane_size; ++i) {
1763     EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
1764     EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
1765   }
1766 
1767   free_aligned_buffer_page_end(orig_uv);
1768   free_aligned_buffer_page_end(dst_u_c);
1769   free_aligned_buffer_page_end(dst_u_opt);
1770   free_aligned_buffer_page_end(dst_v_c);
1771   free_aligned_buffer_page_end(dst_v_opt);
1772 }
1773 
TestMultiply(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1774 static int TestMultiply(int width,
1775                         int height,
1776                         int benchmark_iterations,
1777                         int disable_cpu_flags,
1778                         int benchmark_cpu_info,
1779                         int invert,
1780                         int off) {
1781   if (width < 1) {
1782     width = 1;
1783   }
1784   const int kBpp = 4;
1785   const int kStride = width * kBpp;
1786   align_buffer_page_end(src_argb_a, kStride * height + off);
1787   align_buffer_page_end(src_argb_b, kStride * height + off);
1788   align_buffer_page_end(dst_argb_c, kStride * height);
1789   align_buffer_page_end(dst_argb_opt, kStride * height);
1790   for (int i = 0; i < kStride * height; ++i) {
1791     src_argb_a[i + off] = (fastrand() & 0xff);
1792     src_argb_b[i + off] = (fastrand() & 0xff);
1793   }
1794   memset(dst_argb_c, 0, kStride * height);
1795   memset(dst_argb_opt, 0, kStride * height);
1796 
1797   MaskCpuFlags(disable_cpu_flags);
1798   ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1799                kStride, width, invert * height);
1800   MaskCpuFlags(benchmark_cpu_info);
1801   for (int i = 0; i < benchmark_iterations; ++i) {
1802     ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride,
1803                  dst_argb_opt, kStride, width, invert * height);
1804   }
1805   int max_diff = 0;
1806   for (int i = 0; i < kStride * height; ++i) {
1807     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1808                        static_cast<int>(dst_argb_opt[i]));
1809     if (abs_diff > max_diff) {
1810       max_diff = abs_diff;
1811     }
1812   }
1813   free_aligned_buffer_page_end(src_argb_a);
1814   free_aligned_buffer_page_end(src_argb_b);
1815   free_aligned_buffer_page_end(dst_argb_c);
1816   free_aligned_buffer_page_end(dst_argb_opt);
1817   return max_diff;
1818 }
1819 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Any)1820 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
1821   int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_,
1822                               benchmark_iterations_, disable_cpu_flags_,
1823                               benchmark_cpu_info_, +1, 0);
1824   EXPECT_LE(max_diff, 1);
1825 }
1826 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Unaligned)1827 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
1828   int max_diff =
1829       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1830                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1831   EXPECT_LE(max_diff, 1);
1832 }
1833 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Invert)1834 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
1835   int max_diff =
1836       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1837                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1838   EXPECT_LE(max_diff, 1);
1839 }
1840 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Opt)1841 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
1842   int max_diff =
1843       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1844                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1845   EXPECT_LE(max_diff, 1);
1846 }
1847 
TestAdd(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1848 static int TestAdd(int width,
1849                    int height,
1850                    int benchmark_iterations,
1851                    int disable_cpu_flags,
1852                    int benchmark_cpu_info,
1853                    int invert,
1854                    int off) {
1855   if (width < 1) {
1856     width = 1;
1857   }
1858   const int kBpp = 4;
1859   const int kStride = width * kBpp;
1860   align_buffer_page_end(src_argb_a, kStride * height + off);
1861   align_buffer_page_end(src_argb_b, kStride * height + off);
1862   align_buffer_page_end(dst_argb_c, kStride * height);
1863   align_buffer_page_end(dst_argb_opt, kStride * height);
1864   for (int i = 0; i < kStride * height; ++i) {
1865     src_argb_a[i + off] = (fastrand() & 0xff);
1866     src_argb_b[i + off] = (fastrand() & 0xff);
1867   }
1868   memset(dst_argb_c, 0, kStride * height);
1869   memset(dst_argb_opt, 0, kStride * height);
1870 
1871   MaskCpuFlags(disable_cpu_flags);
1872   ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1873           kStride, width, invert * height);
1874   MaskCpuFlags(benchmark_cpu_info);
1875   for (int i = 0; i < benchmark_iterations; ++i) {
1876     ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt,
1877             kStride, width, invert * height);
1878   }
1879   int max_diff = 0;
1880   for (int i = 0; i < kStride * height; ++i) {
1881     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1882                        static_cast<int>(dst_argb_opt[i]));
1883     if (abs_diff > max_diff) {
1884       max_diff = abs_diff;
1885     }
1886   }
1887   free_aligned_buffer_page_end(src_argb_a);
1888   free_aligned_buffer_page_end(src_argb_b);
1889   free_aligned_buffer_page_end(dst_argb_c);
1890   free_aligned_buffer_page_end(dst_argb_opt);
1891   return max_diff;
1892 }
1893 
TEST_F(LibYUVPlanarTest,ARGBAdd_Any)1894 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
1895   int max_diff =
1896       TestAdd(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1897               disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1898   EXPECT_LE(max_diff, 1);
1899 }
1900 
TEST_F(LibYUVPlanarTest,ARGBAdd_Unaligned)1901 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) {
1902   int max_diff =
1903       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1904               disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1905   EXPECT_LE(max_diff, 1);
1906 }
1907 
TEST_F(LibYUVPlanarTest,ARGBAdd_Invert)1908 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) {
1909   int max_diff =
1910       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1911               disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1912   EXPECT_LE(max_diff, 1);
1913 }
1914 
TEST_F(LibYUVPlanarTest,ARGBAdd_Opt)1915 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) {
1916   int max_diff =
1917       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1918               disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1919   EXPECT_LE(max_diff, 1);
1920 }
1921 
TestSubtract(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1922 static int TestSubtract(int width,
1923                         int height,
1924                         int benchmark_iterations,
1925                         int disable_cpu_flags,
1926                         int benchmark_cpu_info,
1927                         int invert,
1928                         int off) {
1929   if (width < 1) {
1930     width = 1;
1931   }
1932   const int kBpp = 4;
1933   const int kStride = width * kBpp;
1934   align_buffer_page_end(src_argb_a, kStride * height + off);
1935   align_buffer_page_end(src_argb_b, kStride * height + off);
1936   align_buffer_page_end(dst_argb_c, kStride * height);
1937   align_buffer_page_end(dst_argb_opt, kStride * height);
1938   for (int i = 0; i < kStride * height; ++i) {
1939     src_argb_a[i + off] = (fastrand() & 0xff);
1940     src_argb_b[i + off] = (fastrand() & 0xff);
1941   }
1942   memset(dst_argb_c, 0, kStride * height);
1943   memset(dst_argb_opt, 0, kStride * height);
1944 
1945   MaskCpuFlags(disable_cpu_flags);
1946   ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1947                kStride, width, invert * height);
1948   MaskCpuFlags(benchmark_cpu_info);
1949   for (int i = 0; i < benchmark_iterations; ++i) {
1950     ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride,
1951                  dst_argb_opt, kStride, width, invert * height);
1952   }
1953   int max_diff = 0;
1954   for (int i = 0; i < kStride * height; ++i) {
1955     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1956                        static_cast<int>(dst_argb_opt[i]));
1957     if (abs_diff > max_diff) {
1958       max_diff = abs_diff;
1959     }
1960   }
1961   free_aligned_buffer_page_end(src_argb_a);
1962   free_aligned_buffer_page_end(src_argb_b);
1963   free_aligned_buffer_page_end(dst_argb_c);
1964   free_aligned_buffer_page_end(dst_argb_opt);
1965   return max_diff;
1966 }
1967 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Any)1968 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
1969   int max_diff = TestSubtract(benchmark_width_ + 1, benchmark_height_,
1970                               benchmark_iterations_, disable_cpu_flags_,
1971                               benchmark_cpu_info_, +1, 0);
1972   EXPECT_LE(max_diff, 1);
1973 }
1974 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Unaligned)1975 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) {
1976   int max_diff =
1977       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1978                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1979   EXPECT_LE(max_diff, 1);
1980 }
1981 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Invert)1982 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) {
1983   int max_diff =
1984       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1985                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1986   EXPECT_LE(max_diff, 1);
1987 }
1988 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Opt)1989 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) {
1990   int max_diff =
1991       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1992                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1993   EXPECT_LE(max_diff, 1);
1994 }
1995 
TestSobel(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1996 static int TestSobel(int width,
1997                      int height,
1998                      int benchmark_iterations,
1999                      int disable_cpu_flags,
2000                      int benchmark_cpu_info,
2001                      int invert,
2002                      int off) {
2003   if (width < 1) {
2004     width = 1;
2005   }
2006   const int kBpp = 4;
2007   const int kStride = width * kBpp;
2008   align_buffer_page_end(src_argb_a, kStride * height + off);
2009   align_buffer_page_end(dst_argb_c, kStride * height);
2010   align_buffer_page_end(dst_argb_opt, kStride * height);
2011   memset(src_argb_a, 0, kStride * height + off);
2012   for (int i = 0; i < kStride * height; ++i) {
2013     src_argb_a[i + off] = (fastrand() & 0xff);
2014   }
2015   memset(dst_argb_c, 0, kStride * height);
2016   memset(dst_argb_opt, 0, kStride * height);
2017 
2018   MaskCpuFlags(disable_cpu_flags);
2019   ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width,
2020             invert * height);
2021   MaskCpuFlags(benchmark_cpu_info);
2022   for (int i = 0; i < benchmark_iterations; ++i) {
2023     ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
2024               invert * height);
2025   }
2026   int max_diff = 0;
2027   for (int i = 0; i < kStride * height; ++i) {
2028     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2029                        static_cast<int>(dst_argb_opt[i]));
2030     if (abs_diff > max_diff) {
2031       max_diff = abs_diff;
2032     }
2033   }
2034   free_aligned_buffer_page_end(src_argb_a);
2035   free_aligned_buffer_page_end(dst_argb_c);
2036   free_aligned_buffer_page_end(dst_argb_opt);
2037   return max_diff;
2038 }
2039 
TEST_F(LibYUVPlanarTest,ARGBSobel_Any)2040 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
2041   int max_diff =
2042       TestSobel(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2043                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2044   EXPECT_EQ(0, max_diff);
2045 }
2046 
TEST_F(LibYUVPlanarTest,ARGBSobel_Unaligned)2047 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) {
2048   int max_diff =
2049       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2050                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2051   EXPECT_EQ(0, max_diff);
2052 }
2053 
TEST_F(LibYUVPlanarTest,ARGBSobel_Invert)2054 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) {
2055   int max_diff =
2056       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2057                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2058   EXPECT_EQ(0, max_diff);
2059 }
2060 
TEST_F(LibYUVPlanarTest,ARGBSobel_Opt)2061 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) {
2062   int max_diff =
2063       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2064                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2065   EXPECT_EQ(0, max_diff);
2066 }
2067 
TestSobelToPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2068 static int TestSobelToPlane(int width,
2069                             int height,
2070                             int benchmark_iterations,
2071                             int disable_cpu_flags,
2072                             int benchmark_cpu_info,
2073                             int invert,
2074                             int off) {
2075   if (width < 1) {
2076     width = 1;
2077   }
2078   const int kSrcBpp = 4;
2079   const int kDstBpp = 1;
2080   const int kSrcStride = (width * kSrcBpp + 15) & ~15;
2081   const int kDstStride = (width * kDstBpp + 15) & ~15;
2082   align_buffer_page_end(src_argb_a, kSrcStride * height + off);
2083   align_buffer_page_end(dst_argb_c, kDstStride * height);
2084   align_buffer_page_end(dst_argb_opt, kDstStride * height);
2085   memset(src_argb_a, 0, kSrcStride * height + off);
2086   for (int i = 0; i < kSrcStride * height; ++i) {
2087     src_argb_a[i + off] = (fastrand() & 0xff);
2088   }
2089   memset(dst_argb_c, 0, kDstStride * height);
2090   memset(dst_argb_opt, 0, kDstStride * height);
2091 
2092   MaskCpuFlags(disable_cpu_flags);
2093   ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width,
2094                    invert * height);
2095   MaskCpuFlags(benchmark_cpu_info);
2096   for (int i = 0; i < benchmark_iterations; ++i) {
2097     ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride,
2098                      width, invert * height);
2099   }
2100   int max_diff = 0;
2101   for (int i = 0; i < kDstStride * height; ++i) {
2102     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2103                        static_cast<int>(dst_argb_opt[i]));
2104     if (abs_diff > max_diff) {
2105       max_diff = abs_diff;
2106     }
2107   }
2108   free_aligned_buffer_page_end(src_argb_a);
2109   free_aligned_buffer_page_end(dst_argb_c);
2110   free_aligned_buffer_page_end(dst_argb_opt);
2111   return max_diff;
2112 }
2113 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Any)2114 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
2115   int max_diff = TestSobelToPlane(benchmark_width_ + 1, benchmark_height_,
2116                                   benchmark_iterations_, disable_cpu_flags_,
2117                                   benchmark_cpu_info_, +1, 0);
2118   EXPECT_EQ(0, max_diff);
2119 }
2120 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Unaligned)2121 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) {
2122   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2123                                   benchmark_iterations_, disable_cpu_flags_,
2124                                   benchmark_cpu_info_, +1, 1);
2125   EXPECT_EQ(0, max_diff);
2126 }
2127 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Invert)2128 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) {
2129   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2130                                   benchmark_iterations_, disable_cpu_flags_,
2131                                   benchmark_cpu_info_, -1, 0);
2132   EXPECT_EQ(0, max_diff);
2133 }
2134 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Opt)2135 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) {
2136   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2137                                   benchmark_iterations_, disable_cpu_flags_,
2138                                   benchmark_cpu_info_, +1, 0);
2139   EXPECT_EQ(0, max_diff);
2140 }
2141 
TestSobelXY(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2142 static int TestSobelXY(int width,
2143                        int height,
2144                        int benchmark_iterations,
2145                        int disable_cpu_flags,
2146                        int benchmark_cpu_info,
2147                        int invert,
2148                        int off) {
2149   if (width < 1) {
2150     width = 1;
2151   }
2152   const int kBpp = 4;
2153   const int kStride = width * kBpp;
2154   align_buffer_page_end(src_argb_a, kStride * height + off);
2155   align_buffer_page_end(dst_argb_c, kStride * height);
2156   align_buffer_page_end(dst_argb_opt, kStride * height);
2157   memset(src_argb_a, 0, kStride * height + off);
2158   for (int i = 0; i < kStride * height; ++i) {
2159     src_argb_a[i + off] = (fastrand() & 0xff);
2160   }
2161   memset(dst_argb_c, 0, kStride * height);
2162   memset(dst_argb_opt, 0, kStride * height);
2163 
2164   MaskCpuFlags(disable_cpu_flags);
2165   ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width,
2166               invert * height);
2167   MaskCpuFlags(benchmark_cpu_info);
2168   for (int i = 0; i < benchmark_iterations; ++i) {
2169     ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
2170                 invert * height);
2171   }
2172   int max_diff = 0;
2173   for (int i = 0; i < kStride * height; ++i) {
2174     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2175                        static_cast<int>(dst_argb_opt[i]));
2176     if (abs_diff > max_diff) {
2177       max_diff = abs_diff;
2178     }
2179   }
2180   free_aligned_buffer_page_end(src_argb_a);
2181   free_aligned_buffer_page_end(dst_argb_c);
2182   free_aligned_buffer_page_end(dst_argb_opt);
2183   return max_diff;
2184 }
2185 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Any)2186 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
2187   int max_diff = TestSobelXY(benchmark_width_ + 1, benchmark_height_,
2188                              benchmark_iterations_, disable_cpu_flags_,
2189                              benchmark_cpu_info_, +1, 0);
2190   EXPECT_EQ(0, max_diff);
2191 }
2192 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Unaligned)2193 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) {
2194   int max_diff =
2195       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2196                   disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2197   EXPECT_EQ(0, max_diff);
2198 }
2199 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Invert)2200 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) {
2201   int max_diff =
2202       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2203                   disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2204   EXPECT_EQ(0, max_diff);
2205 }
2206 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Opt)2207 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) {
2208   int max_diff =
2209       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2210                   disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2211   EXPECT_EQ(0, max_diff);
2212 }
2213 
TestBlur(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int radius)2214 static int TestBlur(int width,
2215                     int height,
2216                     int benchmark_iterations,
2217                     int disable_cpu_flags,
2218                     int benchmark_cpu_info,
2219                     int invert,
2220                     int off,
2221                     int radius) {
2222   if (width < 1) {
2223     width = 1;
2224   }
2225   const int kBpp = 4;
2226   const int kStride = width * kBpp;
2227   align_buffer_page_end(src_argb_a, kStride * height + off);
2228   align_buffer_page_end(dst_cumsum, width * height * 16);
2229   align_buffer_page_end(dst_argb_c, kStride * height);
2230   align_buffer_page_end(dst_argb_opt, kStride * height);
2231   for (int i = 0; i < kStride * height; ++i) {
2232     src_argb_a[i + off] = (fastrand() & 0xff);
2233   }
2234   memset(dst_cumsum, 0, width * height * 16);
2235   memset(dst_argb_c, 0, kStride * height);
2236   memset(dst_argb_opt, 0, kStride * height);
2237 
2238   MaskCpuFlags(disable_cpu_flags);
2239   ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
2240            reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
2241            invert * height, radius);
2242   MaskCpuFlags(benchmark_cpu_info);
2243   for (int i = 0; i < benchmark_iterations; ++i) {
2244     ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
2245              reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
2246              invert * height, radius);
2247   }
2248   int max_diff = 0;
2249   for (int i = 0; i < kStride * height; ++i) {
2250     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2251                        static_cast<int>(dst_argb_opt[i]));
2252     if (abs_diff > max_diff) {
2253       max_diff = abs_diff;
2254     }
2255   }
2256   free_aligned_buffer_page_end(src_argb_a);
2257   free_aligned_buffer_page_end(dst_cumsum);
2258   free_aligned_buffer_page_end(dst_argb_c);
2259   free_aligned_buffer_page_end(dst_argb_opt);
2260   return max_diff;
2261 }
2262 
2263 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
2264 #define DISABLED_ARM(name) name
2265 #else
2266 #define DISABLED_ARM(name) DISABLED_##name
2267 #endif
2268 
2269 static const int kBlurSize = 55;
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Any))2270 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Any)) {
2271   int max_diff =
2272       TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2273                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
2274   EXPECT_LE(max_diff, 1);
2275 }
2276 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Unaligned))2277 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Unaligned)) {
2278   int max_diff =
2279       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2280                disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
2281   EXPECT_LE(max_diff, 1);
2282 }
2283 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Invert))2284 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Invert)) {
2285   int max_diff =
2286       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2287                disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
2288   EXPECT_LE(max_diff, 1);
2289 }
2290 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Opt))2291 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Opt)) {
2292   int max_diff =
2293       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2294                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
2295   EXPECT_LE(max_diff, 1);
2296 }
2297 
2298 static const int kBlurSmallSize = 5;
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Any))2299 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Any)) {
2300   int max_diff =
2301       TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2302                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
2303   EXPECT_LE(max_diff, 1);
2304 }
2305 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Unaligned))2306 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Unaligned)) {
2307   int max_diff =
2308       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2309                disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
2310   EXPECT_LE(max_diff, 1);
2311 }
2312 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Invert))2313 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Invert)) {
2314   int max_diff =
2315       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2316                disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
2317   EXPECT_LE(max_diff, 1);
2318 }
2319 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Opt))2320 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Opt)) {
2321   int max_diff =
2322       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2323                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
2324   EXPECT_LE(max_diff, 1);
2325 }
2326 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (TestARGBPolynomial))2327 TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) {
2328   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2329   SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2330   SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2331   memset(orig_pixels, 0, sizeof(orig_pixels));
2332 
2333   SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
2334       0.94230f,  -3.03300f,    -2.92500f,    0.f,  // C0
2335       0.584500f, 1.112000f,    1.535000f,    1.f,  // C1 x
2336       0.001313f, -0.002503f,   -0.004496f,   0.f,  // C2 x * x
2337       0.0f,      0.000006965f, 0.000008781f, 0.f,  // C3 x * x * x
2338   };
2339 
2340   // Test blue
2341   orig_pixels[0][0] = 255u;
2342   orig_pixels[0][1] = 0u;
2343   orig_pixels[0][2] = 0u;
2344   orig_pixels[0][3] = 128u;
2345   // Test green
2346   orig_pixels[1][0] = 0u;
2347   orig_pixels[1][1] = 255u;
2348   orig_pixels[1][2] = 0u;
2349   orig_pixels[1][3] = 0u;
2350   // Test red
2351   orig_pixels[2][0] = 0u;
2352   orig_pixels[2][1] = 0u;
2353   orig_pixels[2][2] = 255u;
2354   orig_pixels[2][3] = 255u;
2355   // Test white
2356   orig_pixels[3][0] = 255u;
2357   orig_pixels[3][1] = 255u;
2358   orig_pixels[3][2] = 255u;
2359   orig_pixels[3][3] = 255u;
2360   // Test color
2361   orig_pixels[4][0] = 16u;
2362   orig_pixels[4][1] = 64u;
2363   orig_pixels[4][2] = 192u;
2364   orig_pixels[4][3] = 224u;
2365   // Do 16 to test asm version.
2366   ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2367                  &kWarmifyPolynomial[0], 16, 1);
2368   EXPECT_EQ(235u, dst_pixels_opt[0][0]);
2369   EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2370   EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2371   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2372   EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2373   EXPECT_EQ(233u, dst_pixels_opt[1][1]);
2374   EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2375   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2376   EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2377   EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2378   EXPECT_EQ(241u, dst_pixels_opt[2][2]);
2379   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2380   EXPECT_EQ(235u, dst_pixels_opt[3][0]);
2381   EXPECT_EQ(233u, dst_pixels_opt[3][1]);
2382   EXPECT_EQ(241u, dst_pixels_opt[3][2]);
2383   EXPECT_EQ(255u, dst_pixels_opt[3][3]);
2384   EXPECT_EQ(10u, dst_pixels_opt[4][0]);
2385   EXPECT_EQ(59u, dst_pixels_opt[4][1]);
2386   EXPECT_EQ(188u, dst_pixels_opt[4][2]);
2387   EXPECT_EQ(224u, dst_pixels_opt[4][3]);
2388 
2389   for (int i = 0; i < 1280; ++i) {
2390     orig_pixels[i][0] = i;
2391     orig_pixels[i][1] = i / 2;
2392     orig_pixels[i][2] = i / 3;
2393     orig_pixels[i][3] = i;
2394   }
2395 
2396   MaskCpuFlags(disable_cpu_flags_);
2397   ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2398                  &kWarmifyPolynomial[0], 1280, 1);
2399   MaskCpuFlags(benchmark_cpu_info_);
2400 
2401   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2402     ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2403                    &kWarmifyPolynomial[0], 1280, 1);
2404   }
2405 
2406   for (int i = 0; i < 1280; ++i) {
2407     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2408     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2409     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2410     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2411   }
2412 }
2413 
TestHalfFloatPlane(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale,int mask)2414 int TestHalfFloatPlane(int benchmark_width,
2415                        int benchmark_height,
2416                        int benchmark_iterations,
2417                        int disable_cpu_flags,
2418                        int benchmark_cpu_info,
2419                        float scale,
2420                        int mask) {
2421   int i, j;
2422   const int y_plane_size = benchmark_width * benchmark_height * 2;
2423 
2424   align_buffer_page_end(orig_y, y_plane_size * 3);
2425   uint8_t* dst_opt = orig_y + y_plane_size;
2426   uint8_t* dst_c = orig_y + y_plane_size * 2;
2427 
2428   MemRandomize(orig_y, y_plane_size);
2429   memset(dst_c, 0, y_plane_size);
2430   memset(dst_opt, 1, y_plane_size);
2431 
2432   for (i = 0; i < y_plane_size / 2; ++i) {
2433     reinterpret_cast<uint16_t*>(orig_y)[i] &= mask;
2434   }
2435 
2436   // Disable all optimizations.
2437   MaskCpuFlags(disable_cpu_flags);
2438   for (j = 0; j < benchmark_iterations; j++) {
2439     HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2440                    reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
2441                    scale, benchmark_width, benchmark_height);
2442   }
2443 
2444   // Enable optimizations.
2445   MaskCpuFlags(benchmark_cpu_info);
2446   for (j = 0; j < benchmark_iterations; j++) {
2447     HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2448                    reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
2449                    scale, benchmark_width, benchmark_height);
2450   }
2451 
2452   int max_diff = 0;
2453   for (i = 0; i < y_plane_size / 2; ++i) {
2454     int abs_diff =
2455         abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
2456             static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
2457     if (abs_diff > max_diff) {
2458       max_diff = abs_diff;
2459     }
2460   }
2461 
2462   free_aligned_buffer_page_end(orig_y);
2463   return max_diff;
2464 }
2465 
2466 #if defined(__arm__)
EnableFlushDenormalToZero(void)2467 static void EnableFlushDenormalToZero(void) {
2468   uint32_t cw;
2469   __asm__ __volatile__(
2470       "vmrs   %0, fpscr         \n"
2471       "orr    %0, %0, #0x1000000        \n"
2472       "vmsr   fpscr, %0         \n"
2473       : "=r"(cw)::"memory");
2474 }
2475 #endif
2476 
2477 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
2478 // exponent to be less than 0.  15 - log2(65536) = -1/  This shouldnt normally
2479 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
2480 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_denormal)2481 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
2482 // 32 bit arm rounding on denormal case is off by 1 compared to C.
2483 #if defined(__arm__)
2484   EnableFlushDenormalToZero();
2485 #endif
2486   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2487                                 benchmark_iterations_, disable_cpu_flags_,
2488                                 benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
2489   EXPECT_EQ(0, diff);
2490 }
2491 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_One)2492 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
2493   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2494                                 benchmark_iterations_, disable_cpu_flags_,
2495                                 benchmark_cpu_info_, 1.0f, 65535);
2496   EXPECT_LE(diff, 1);
2497 }
2498 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_Opt)2499 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
2500   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2501                                 benchmark_iterations_, disable_cpu_flags_,
2502                                 benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
2503   EXPECT_EQ(0, diff);
2504 }
2505 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_10bit_Opt)2506 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
2507   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2508                                 benchmark_iterations_, disable_cpu_flags_,
2509                                 benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
2510   EXPECT_EQ(0, diff);
2511 }
2512 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_9bit_Opt)2513 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
2514   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2515                                 benchmark_iterations_, disable_cpu_flags_,
2516                                 benchmark_cpu_info_, 1.0f / 512.0f, 511);
2517   EXPECT_EQ(0, diff);
2518 }
2519 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Opt)2520 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
2521   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2522                                 benchmark_iterations_, disable_cpu_flags_,
2523                                 benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
2524   EXPECT_EQ(0, diff);
2525 }
2526 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Offby1)2527 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
2528   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2529                                 benchmark_iterations_, disable_cpu_flags_,
2530                                 benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
2531   EXPECT_EQ(0, diff);
2532 }
2533 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_One)2534 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
2535   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2536                                 benchmark_iterations_, disable_cpu_flags_,
2537                                 benchmark_cpu_info_, 1.0f, 2047);
2538   EXPECT_EQ(0, diff);
2539 }
2540 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_12bit_One)2541 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
2542   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2543                                 benchmark_iterations_, disable_cpu_flags_,
2544                                 benchmark_cpu_info_, 1.0f, 4095);
2545   EXPECT_LE(diff, 1);
2546 }
2547 
TestByteToFloat(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale)2548 float TestByteToFloat(int benchmark_width,
2549                       int benchmark_height,
2550                       int benchmark_iterations,
2551                       int disable_cpu_flags,
2552                       int benchmark_cpu_info,
2553                       float scale) {
2554   int i, j;
2555   const int y_plane_size = benchmark_width * benchmark_height;
2556 
2557   align_buffer_page_end(orig_y, y_plane_size * (1 + 4 + 4));
2558   float* dst_opt = reinterpret_cast<float*>(orig_y + y_plane_size);
2559   float* dst_c = reinterpret_cast<float*>(orig_y + y_plane_size * 5);
2560 
2561   MemRandomize(orig_y, y_plane_size);
2562   memset(dst_c, 0, y_plane_size * 4);
2563   memset(dst_opt, 1, y_plane_size * 4);
2564 
2565   // Disable all optimizations.
2566   MaskCpuFlags(disable_cpu_flags);
2567   ByteToFloat(orig_y, dst_c, scale, y_plane_size);
2568 
2569   // Enable optimizations.
2570   MaskCpuFlags(benchmark_cpu_info);
2571   for (j = 0; j < benchmark_iterations; j++) {
2572     ByteToFloat(orig_y, dst_opt, scale, y_plane_size);
2573   }
2574 
2575   float max_diff = 0;
2576   for (i = 0; i < y_plane_size; ++i) {
2577     float abs_diff = fabs(dst_c[i] - dst_opt[i]);
2578     if (abs_diff > max_diff) {
2579       max_diff = abs_diff;
2580     }
2581   }
2582 
2583   free_aligned_buffer_page_end(orig_y);
2584   return max_diff;
2585 }
2586 
TEST_F(LibYUVPlanarTest,TestByteToFloat)2587 TEST_F(LibYUVPlanarTest, TestByteToFloat) {
2588   float diff = TestByteToFloat(benchmark_width_, benchmark_height_,
2589                                benchmark_iterations_, disable_cpu_flags_,
2590                                benchmark_cpu_info_, 1.0f);
2591   EXPECT_EQ(0.f, diff);
2592 }
2593 
TEST_F(LibYUVPlanarTest,TestARGBLumaColorTable)2594 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
2595   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2596   SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2597   SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2598   memset(orig_pixels, 0, sizeof(orig_pixels));
2599 
2600   align_buffer_page_end(lumacolortable, 32768);
2601   int v = 0;
2602   for (int i = 0; i < 32768; ++i) {
2603     lumacolortable[i] = v;
2604     v += 3;
2605   }
2606   // Test blue
2607   orig_pixels[0][0] = 255u;
2608   orig_pixels[0][1] = 0u;
2609   orig_pixels[0][2] = 0u;
2610   orig_pixels[0][3] = 128u;
2611   // Test green
2612   orig_pixels[1][0] = 0u;
2613   orig_pixels[1][1] = 255u;
2614   orig_pixels[1][2] = 0u;
2615   orig_pixels[1][3] = 0u;
2616   // Test red
2617   orig_pixels[2][0] = 0u;
2618   orig_pixels[2][1] = 0u;
2619   orig_pixels[2][2] = 255u;
2620   orig_pixels[2][3] = 255u;
2621   // Test color
2622   orig_pixels[3][0] = 16u;
2623   orig_pixels[3][1] = 64u;
2624   orig_pixels[3][2] = 192u;
2625   orig_pixels[3][3] = 224u;
2626   // Do 16 to test asm version.
2627   ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2628                      &lumacolortable[0], 16, 1);
2629   EXPECT_EQ(253u, dst_pixels_opt[0][0]);
2630   EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2631   EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2632   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2633   EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2634   EXPECT_EQ(253u, dst_pixels_opt[1][1]);
2635   EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2636   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2637   EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2638   EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2639   EXPECT_EQ(253u, dst_pixels_opt[2][2]);
2640   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2641   EXPECT_EQ(48u, dst_pixels_opt[3][0]);
2642   EXPECT_EQ(192u, dst_pixels_opt[3][1]);
2643   EXPECT_EQ(64u, dst_pixels_opt[3][2]);
2644   EXPECT_EQ(224u, dst_pixels_opt[3][3]);
2645 
2646   for (int i = 0; i < 1280; ++i) {
2647     orig_pixels[i][0] = i;
2648     orig_pixels[i][1] = i / 2;
2649     orig_pixels[i][2] = i / 3;
2650     orig_pixels[i][3] = i;
2651   }
2652 
2653   MaskCpuFlags(disable_cpu_flags_);
2654   ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2655                      lumacolortable, 1280, 1);
2656   MaskCpuFlags(benchmark_cpu_info_);
2657 
2658   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2659     ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2660                        lumacolortable, 1280, 1);
2661   }
2662   for (int i = 0; i < 1280; ++i) {
2663     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2664     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2665     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2666     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2667   }
2668 
2669   free_aligned_buffer_page_end(lumacolortable);
2670 }
2671 
TEST_F(LibYUVPlanarTest,TestARGBCopyAlpha)2672 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
2673   const int kSize = benchmark_width_ * benchmark_height_ * 4;
2674   align_buffer_page_end(orig_pixels, kSize);
2675   align_buffer_page_end(dst_pixels_opt, kSize);
2676   align_buffer_page_end(dst_pixels_c, kSize);
2677 
2678   MemRandomize(orig_pixels, kSize);
2679   MemRandomize(dst_pixels_opt, kSize);
2680   memcpy(dst_pixels_c, dst_pixels_opt, kSize);
2681 
2682   MaskCpuFlags(disable_cpu_flags_);
2683   ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c,
2684                 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2685   MaskCpuFlags(benchmark_cpu_info_);
2686 
2687   for (int i = 0; i < benchmark_iterations_; ++i) {
2688     ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt,
2689                   benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2690   }
2691   for (int i = 0; i < kSize; ++i) {
2692     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2693   }
2694 
2695   free_aligned_buffer_page_end(dst_pixels_c);
2696   free_aligned_buffer_page_end(dst_pixels_opt);
2697   free_aligned_buffer_page_end(orig_pixels);
2698 }
2699 
TEST_F(LibYUVPlanarTest,TestARGBExtractAlpha)2700 TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
2701   const int kPixels = benchmark_width_ * benchmark_height_;
2702   align_buffer_page_end(src_pixels, kPixels * 4);
2703   align_buffer_page_end(dst_pixels_opt, kPixels);
2704   align_buffer_page_end(dst_pixels_c, kPixels);
2705 
2706   MemRandomize(src_pixels, kPixels * 4);
2707   MemRandomize(dst_pixels_opt, kPixels);
2708   memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
2709 
2710   MaskCpuFlags(disable_cpu_flags_);
2711   ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2712                    benchmark_width_, benchmark_width_, benchmark_height_);
2713   MaskCpuFlags(benchmark_cpu_info_);
2714 
2715   for (int i = 0; i < benchmark_iterations_; ++i) {
2716     ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2717                      benchmark_width_, benchmark_width_, benchmark_height_);
2718   }
2719   for (int i = 0; i < kPixels; ++i) {
2720     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2721   }
2722 
2723   free_aligned_buffer_page_end(dst_pixels_c);
2724   free_aligned_buffer_page_end(dst_pixels_opt);
2725   free_aligned_buffer_page_end(src_pixels);
2726 }
2727 
TEST_F(LibYUVPlanarTest,TestARGBCopyYToAlpha)2728 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
2729   const int kPixels = benchmark_width_ * benchmark_height_;
2730   align_buffer_page_end(orig_pixels, kPixels);
2731   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
2732   align_buffer_page_end(dst_pixels_c, kPixels * 4);
2733 
2734   MemRandomize(orig_pixels, kPixels);
2735   MemRandomize(dst_pixels_opt, kPixels * 4);
2736   memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4);
2737 
2738   MaskCpuFlags(disable_cpu_flags_);
2739   ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2740                    benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2741   MaskCpuFlags(benchmark_cpu_info_);
2742 
2743   for (int i = 0; i < benchmark_iterations_; ++i) {
2744     ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2745                      benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2746   }
2747   for (int i = 0; i < kPixels * 4; ++i) {
2748     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2749   }
2750 
2751   free_aligned_buffer_page_end(dst_pixels_c);
2752   free_aligned_buffer_page_end(dst_pixels_opt);
2753   free_aligned_buffer_page_end(orig_pixels);
2754 }
2755 
TestARGBRect(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int bpp)2756 static int TestARGBRect(int width,
2757                         int height,
2758                         int benchmark_iterations,
2759                         int disable_cpu_flags,
2760                         int benchmark_cpu_info,
2761                         int invert,
2762                         int off,
2763                         int bpp) {
2764   if (width < 1) {
2765     width = 1;
2766   }
2767   const int kStride = width * bpp;
2768   const int kSize = kStride * height;
2769   const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
2770 
2771   align_buffer_page_end(dst_argb_c, kSize + off);
2772   align_buffer_page_end(dst_argb_opt, kSize + off);
2773 
2774   MemRandomize(dst_argb_c + off, kSize);
2775   memcpy(dst_argb_opt + off, dst_argb_c + off, kSize);
2776 
2777   MaskCpuFlags(disable_cpu_flags);
2778   if (bpp == 4) {
2779     ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32);
2780   } else {
2781     SetPlane(dst_argb_c + off, kStride, width, invert * height, v32);
2782   }
2783 
2784   MaskCpuFlags(benchmark_cpu_info);
2785   for (int i = 0; i < benchmark_iterations; ++i) {
2786     if (bpp == 4) {
2787       ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32);
2788     } else {
2789       SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32);
2790     }
2791   }
2792   int max_diff = 0;
2793   for (int i = 0; i < kStride * height; ++i) {
2794     int abs_diff = abs(static_cast<int>(dst_argb_c[i + off]) -
2795                        static_cast<int>(dst_argb_opt[i + off]));
2796     if (abs_diff > max_diff) {
2797       max_diff = abs_diff;
2798     }
2799   }
2800   free_aligned_buffer_page_end(dst_argb_c);
2801   free_aligned_buffer_page_end(dst_argb_opt);
2802   return max_diff;
2803 }
2804 
TEST_F(LibYUVPlanarTest,ARGBRect_Any)2805 TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
2806   int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
2807                               benchmark_iterations_, disable_cpu_flags_,
2808                               benchmark_cpu_info_, +1, 0, 4);
2809   EXPECT_EQ(0, max_diff);
2810 }
2811 
TEST_F(LibYUVPlanarTest,ARGBRect_Unaligned)2812 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) {
2813   int max_diff =
2814       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2815                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4);
2816   EXPECT_EQ(0, max_diff);
2817 }
2818 
TEST_F(LibYUVPlanarTest,ARGBRect_Invert)2819 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) {
2820   int max_diff =
2821       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2822                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4);
2823   EXPECT_EQ(0, max_diff);
2824 }
2825 
TEST_F(LibYUVPlanarTest,ARGBRect_Opt)2826 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
2827   int max_diff =
2828       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2829                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4);
2830   EXPECT_EQ(0, max_diff);
2831 }
2832 
TEST_F(LibYUVPlanarTest,SetPlane_Any)2833 TEST_F(LibYUVPlanarTest, SetPlane_Any) {
2834   int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
2835                               benchmark_iterations_, disable_cpu_flags_,
2836                               benchmark_cpu_info_, +1, 0, 1);
2837   EXPECT_EQ(0, max_diff);
2838 }
2839 
TEST_F(LibYUVPlanarTest,SetPlane_Unaligned)2840 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) {
2841   int max_diff =
2842       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2843                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
2844   EXPECT_EQ(0, max_diff);
2845 }
2846 
TEST_F(LibYUVPlanarTest,SetPlane_Invert)2847 TEST_F(LibYUVPlanarTest, SetPlane_Invert) {
2848   int max_diff =
2849       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2850                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
2851   EXPECT_EQ(0, max_diff);
2852 }
2853 
TEST_F(LibYUVPlanarTest,SetPlane_Opt)2854 TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
2855   int max_diff =
2856       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2857                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
2858   EXPECT_EQ(0, max_diff);
2859 }
2860 
TEST_F(LibYUVPlanarTest,MergeUVPlane_Opt)2861 TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
2862   const int kPixels = benchmark_width_ * benchmark_height_;
2863   align_buffer_page_end(src_pixels_u, kPixels);
2864   align_buffer_page_end(src_pixels_v, kPixels);
2865   align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2866   align_buffer_page_end(dst_pixels_c, kPixels * 2);
2867 
2868   MemRandomize(src_pixels_u, kPixels);
2869   MemRandomize(src_pixels_v, kPixels);
2870   MemRandomize(dst_pixels_opt, kPixels * 2);
2871   MemRandomize(dst_pixels_c, kPixels * 2);
2872 
2873   MaskCpuFlags(disable_cpu_flags_);
2874   MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
2875                dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2876                benchmark_height_);
2877   MaskCpuFlags(benchmark_cpu_info_);
2878 
2879   for (int i = 0; i < benchmark_iterations_; ++i) {
2880     MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
2881                  dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2882                  benchmark_height_);
2883   }
2884 
2885   for (int i = 0; i < kPixels * 2; ++i) {
2886     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2887   }
2888 
2889   free_aligned_buffer_page_end(src_pixels_u);
2890   free_aligned_buffer_page_end(src_pixels_v);
2891   free_aligned_buffer_page_end(dst_pixels_opt);
2892   free_aligned_buffer_page_end(dst_pixels_c);
2893 }
2894 
2895 // 16 bit channel split and merge
TEST_F(LibYUVPlanarTest,MergeUVPlane_16_Opt)2896 TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) {
2897   const int kPixels = benchmark_width_ * benchmark_height_;
2898   align_buffer_page_end(src_pixels_u, kPixels * 2);
2899   align_buffer_page_end(src_pixels_v, kPixels * 2);
2900   align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2);
2901   align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2);
2902   MemRandomize(src_pixels_u, kPixels * 2);
2903   MemRandomize(src_pixels_v, kPixels * 2);
2904   MemRandomize(dst_pixels_opt, kPixels * 2 * 2);
2905   MemRandomize(dst_pixels_c, kPixels * 2 * 2);
2906 
2907   MaskCpuFlags(disable_cpu_flags_);
2908   MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
2909                   (const uint16_t*)src_pixels_v, benchmark_width_,
2910                   (uint16_t*)dst_pixels_c, benchmark_width_ * 2,
2911                   benchmark_width_, benchmark_height_, 12);
2912   MaskCpuFlags(benchmark_cpu_info_);
2913 
2914   for (int i = 0; i < benchmark_iterations_; ++i) {
2915     MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
2916                     (const uint16_t*)src_pixels_v, benchmark_width_,
2917                     (uint16_t*)dst_pixels_opt, benchmark_width_ * 2,
2918                     benchmark_width_, benchmark_height_, 12);
2919   }
2920 
2921   for (int i = 0; i < kPixels * 2 * 2; ++i) {
2922     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2923   }
2924   free_aligned_buffer_page_end(src_pixels_u);
2925   free_aligned_buffer_page_end(src_pixels_v);
2926   free_aligned_buffer_page_end(dst_pixels_opt);
2927   free_aligned_buffer_page_end(dst_pixels_c);
2928 }
2929 
TEST_F(LibYUVPlanarTest,SplitUVPlane_Opt)2930 TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
2931   const int kPixels = benchmark_width_ * benchmark_height_;
2932   align_buffer_page_end(src_pixels, kPixels * 2);
2933   align_buffer_page_end(dst_pixels_u_c, kPixels);
2934   align_buffer_page_end(dst_pixels_v_c, kPixels);
2935   align_buffer_page_end(dst_pixels_u_opt, kPixels);
2936   align_buffer_page_end(dst_pixels_v_opt, kPixels);
2937 
2938   MemRandomize(src_pixels, kPixels * 2);
2939   MemRandomize(dst_pixels_u_c, kPixels);
2940   MemRandomize(dst_pixels_v_c, kPixels);
2941   MemRandomize(dst_pixels_u_opt, kPixels);
2942   MemRandomize(dst_pixels_v_opt, kPixels);
2943 
2944   MaskCpuFlags(disable_cpu_flags_);
2945   SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_c,
2946                benchmark_width_, dst_pixels_v_c, benchmark_width_,
2947                benchmark_width_, benchmark_height_);
2948   MaskCpuFlags(benchmark_cpu_info_);
2949 
2950   for (int i = 0; i < benchmark_iterations_; ++i) {
2951     SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_opt,
2952                  benchmark_width_, dst_pixels_v_opt, benchmark_width_,
2953                  benchmark_width_, benchmark_height_);
2954   }
2955 
2956   for (int i = 0; i < kPixels; ++i) {
2957     EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
2958     EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
2959   }
2960 
2961   free_aligned_buffer_page_end(src_pixels);
2962   free_aligned_buffer_page_end(dst_pixels_u_c);
2963   free_aligned_buffer_page_end(dst_pixels_v_c);
2964   free_aligned_buffer_page_end(dst_pixels_u_opt);
2965   free_aligned_buffer_page_end(dst_pixels_v_opt);
2966 }
2967 
2968 // 16 bit channel split
TEST_F(LibYUVPlanarTest,SplitUVPlane_16_Opt)2969 TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
2970   const int kPixels = benchmark_width_ * benchmark_height_;
2971   align_buffer_page_end(src_pixels, kPixels * 2 * 2);
2972   align_buffer_page_end(dst_pixels_u_c, kPixels * 2);
2973   align_buffer_page_end(dst_pixels_v_c, kPixels * 2);
2974   align_buffer_page_end(dst_pixels_u_opt, kPixels * 2);
2975   align_buffer_page_end(dst_pixels_v_opt, kPixels * 2);
2976   MemRandomize(src_pixels, kPixels * 2 * 2);
2977   MemRandomize(dst_pixels_u_c, kPixels * 2);
2978   MemRandomize(dst_pixels_v_c, kPixels * 2);
2979   MemRandomize(dst_pixels_u_opt, kPixels * 2);
2980   MemRandomize(dst_pixels_v_opt, kPixels * 2);
2981 
2982   MaskCpuFlags(disable_cpu_flags_);
2983   SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
2984                   (uint16_t*)dst_pixels_u_c, benchmark_width_,
2985                   (uint16_t*)dst_pixels_v_c, benchmark_width_, benchmark_width_,
2986                   benchmark_height_, 10);
2987   MaskCpuFlags(benchmark_cpu_info_);
2988 
2989   for (int i = 0; i < benchmark_iterations_; ++i) {
2990     SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
2991                     (uint16_t*)dst_pixels_u_opt, benchmark_width_,
2992                     (uint16_t*)dst_pixels_v_opt, benchmark_width_,
2993                     benchmark_width_, benchmark_height_, 10);
2994   }
2995 
2996   for (int i = 0; i < kPixels * 2; ++i) {
2997     EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
2998     EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
2999   }
3000   free_aligned_buffer_page_end(src_pixels);
3001   free_aligned_buffer_page_end(dst_pixels_u_c);
3002   free_aligned_buffer_page_end(dst_pixels_v_c);
3003   free_aligned_buffer_page_end(dst_pixels_u_opt);
3004   free_aligned_buffer_page_end(dst_pixels_v_opt);
3005 }
3006 
TEST_F(LibYUVPlanarTest,SwapUVPlane_Opt)3007 TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
3008   // Round count up to multiple of 16
3009   const int kPixels = benchmark_width_ * benchmark_height_;
3010   align_buffer_page_end(src_pixels, kPixels * 2);
3011   align_buffer_page_end(dst_pixels_opt, kPixels * 2);
3012   align_buffer_page_end(dst_pixels_c, kPixels * 2);
3013 
3014   MemRandomize(src_pixels, kPixels * 2);
3015   MemRandomize(dst_pixels_opt, kPixels * 2);
3016   MemRandomize(dst_pixels_c, kPixels * 2);
3017 
3018   MaskCpuFlags(disable_cpu_flags_);
3019   SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
3020               benchmark_width_ * 2, benchmark_width_, benchmark_height_);
3021   MaskCpuFlags(benchmark_cpu_info_);
3022 
3023   for (int i = 0; i < benchmark_iterations_; ++i) {
3024     SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
3025                 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
3026   }
3027 
3028   for (int i = 0; i < kPixels * 2; ++i) {
3029     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3030   }
3031 
3032   free_aligned_buffer_page_end(src_pixels);
3033   free_aligned_buffer_page_end(dst_pixels_opt);
3034   free_aligned_buffer_page_end(dst_pixels_c);
3035 }
3036 
TEST_F(LibYUVPlanarTest,MergeRGBPlane_Opt)3037 TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
3038   // Round count up to multiple of 16
3039   const int kPixels = benchmark_width_ * benchmark_height_;
3040   align_buffer_page_end(src_pixels, kPixels * 3);
3041   align_buffer_page_end(tmp_pixels_r, kPixels);
3042   align_buffer_page_end(tmp_pixels_g, kPixels);
3043   align_buffer_page_end(tmp_pixels_b, kPixels);
3044   align_buffer_page_end(dst_pixels_opt, kPixels * 3);
3045   align_buffer_page_end(dst_pixels_c, kPixels * 3);
3046 
3047   MemRandomize(src_pixels, kPixels * 3);
3048   MemRandomize(tmp_pixels_r, kPixels);
3049   MemRandomize(tmp_pixels_g, kPixels);
3050   MemRandomize(tmp_pixels_b, kPixels);
3051   MemRandomize(dst_pixels_opt, kPixels * 3);
3052   MemRandomize(dst_pixels_c, kPixels * 3);
3053 
3054   MaskCpuFlags(disable_cpu_flags_);
3055   SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3056                 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3057                 benchmark_width_, benchmark_width_, benchmark_height_);
3058   MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3059                 tmp_pixels_b, benchmark_width_, dst_pixels_c,
3060                 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3061   MaskCpuFlags(benchmark_cpu_info_);
3062 
3063   SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3064                 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3065                 benchmark_width_, benchmark_width_, benchmark_height_);
3066 
3067   for (int i = 0; i < benchmark_iterations_; ++i) {
3068     MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3069                   benchmark_width_, tmp_pixels_b, benchmark_width_,
3070                   dst_pixels_opt, benchmark_width_ * 3, benchmark_width_,
3071                   benchmark_height_);
3072   }
3073 
3074   for (int i = 0; i < kPixels * 3; ++i) {
3075     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3076   }
3077 
3078   free_aligned_buffer_page_end(src_pixels);
3079   free_aligned_buffer_page_end(tmp_pixels_r);
3080   free_aligned_buffer_page_end(tmp_pixels_g);
3081   free_aligned_buffer_page_end(tmp_pixels_b);
3082   free_aligned_buffer_page_end(dst_pixels_opt);
3083   free_aligned_buffer_page_end(dst_pixels_c);
3084 }
3085 
TEST_F(LibYUVPlanarTest,SplitRGBPlane_Opt)3086 TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
3087   // Round count up to multiple of 16
3088   const int kPixels = benchmark_width_ * benchmark_height_;
3089   align_buffer_page_end(src_pixels, kPixels * 3);
3090   align_buffer_page_end(tmp_pixels_r, kPixels);
3091   align_buffer_page_end(tmp_pixels_g, kPixels);
3092   align_buffer_page_end(tmp_pixels_b, kPixels);
3093   align_buffer_page_end(dst_pixels_opt, kPixels * 3);
3094   align_buffer_page_end(dst_pixels_c, kPixels * 3);
3095 
3096   MemRandomize(src_pixels, kPixels * 3);
3097   MemRandomize(tmp_pixels_r, kPixels);
3098   MemRandomize(tmp_pixels_g, kPixels);
3099   MemRandomize(tmp_pixels_b, kPixels);
3100   MemRandomize(dst_pixels_opt, kPixels * 3);
3101   MemRandomize(dst_pixels_c, kPixels * 3);
3102 
3103   MaskCpuFlags(disable_cpu_flags_);
3104   SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3105                 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3106                 benchmark_width_, benchmark_width_, benchmark_height_);
3107   MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3108                 tmp_pixels_b, benchmark_width_, dst_pixels_c,
3109                 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3110   MaskCpuFlags(benchmark_cpu_info_);
3111 
3112   for (int i = 0; i < benchmark_iterations_; ++i) {
3113     SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3114                   benchmark_width_, tmp_pixels_g, benchmark_width_,
3115                   tmp_pixels_b, benchmark_width_, benchmark_width_,
3116                   benchmark_height_);
3117   }
3118   MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3119                 tmp_pixels_b, benchmark_width_, dst_pixels_opt,
3120                 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3121 
3122   for (int i = 0; i < kPixels * 3; ++i) {
3123     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3124   }
3125 
3126   free_aligned_buffer_page_end(src_pixels);
3127   free_aligned_buffer_page_end(tmp_pixels_r);
3128   free_aligned_buffer_page_end(tmp_pixels_g);
3129   free_aligned_buffer_page_end(tmp_pixels_b);
3130   free_aligned_buffer_page_end(dst_pixels_opt);
3131   free_aligned_buffer_page_end(dst_pixels_c);
3132 }
3133 
TEST_F(LibYUVPlanarTest,MergeARGBPlane_Opt)3134 TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
3135   const int kPixels = benchmark_width_ * benchmark_height_;
3136   align_buffer_page_end(src_pixels, kPixels * 4);
3137   align_buffer_page_end(tmp_pixels_r, kPixels);
3138   align_buffer_page_end(tmp_pixels_g, kPixels);
3139   align_buffer_page_end(tmp_pixels_b, kPixels);
3140   align_buffer_page_end(tmp_pixels_a, kPixels);
3141   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3142   align_buffer_page_end(dst_pixels_c, kPixels * 4);
3143 
3144   MemRandomize(src_pixels, kPixels * 4);
3145   MemRandomize(tmp_pixels_r, kPixels);
3146   MemRandomize(tmp_pixels_g, kPixels);
3147   MemRandomize(tmp_pixels_b, kPixels);
3148   MemRandomize(tmp_pixels_a, kPixels);
3149   MemRandomize(dst_pixels_opt, kPixels * 4);
3150   MemRandomize(dst_pixels_c, kPixels * 4);
3151 
3152   MaskCpuFlags(disable_cpu_flags_);
3153   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3154                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3155                  benchmark_width_, tmp_pixels_a, benchmark_width_,
3156                  benchmark_width_, benchmark_height_);
3157   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3158                  tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3159                  dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
3160                  benchmark_height_);
3161 
3162   MaskCpuFlags(benchmark_cpu_info_);
3163   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3164                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3165                  benchmark_width_, tmp_pixels_a, benchmark_width_,
3166                  benchmark_width_, benchmark_height_);
3167 
3168   for (int i = 0; i < benchmark_iterations_; ++i) {
3169     MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3170                    benchmark_width_, tmp_pixels_b, benchmark_width_,
3171                    tmp_pixels_a, benchmark_width_, dst_pixels_opt,
3172                    benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3173   }
3174 
3175   for (int i = 0; i < kPixels * 4; ++i) {
3176     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3177   }
3178 
3179   free_aligned_buffer_page_end(src_pixels);
3180   free_aligned_buffer_page_end(tmp_pixels_r);
3181   free_aligned_buffer_page_end(tmp_pixels_g);
3182   free_aligned_buffer_page_end(tmp_pixels_b);
3183   free_aligned_buffer_page_end(tmp_pixels_a);
3184   free_aligned_buffer_page_end(dst_pixels_opt);
3185   free_aligned_buffer_page_end(dst_pixels_c);
3186 }
3187 
TEST_F(LibYUVPlanarTest,SplitARGBPlane_Opt)3188 TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
3189   const int kPixels = benchmark_width_ * benchmark_height_;
3190   align_buffer_page_end(src_pixels, kPixels * 4);
3191   align_buffer_page_end(tmp_pixels_r, kPixels);
3192   align_buffer_page_end(tmp_pixels_g, kPixels);
3193   align_buffer_page_end(tmp_pixels_b, kPixels);
3194   align_buffer_page_end(tmp_pixels_a, kPixels);
3195   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3196   align_buffer_page_end(dst_pixels_c, kPixels * 4);
3197 
3198   MemRandomize(src_pixels, kPixels * 4);
3199   MemRandomize(tmp_pixels_r, kPixels);
3200   MemRandomize(tmp_pixels_g, kPixels);
3201   MemRandomize(tmp_pixels_b, kPixels);
3202   MemRandomize(tmp_pixels_a, kPixels);
3203   MemRandomize(dst_pixels_opt, kPixels * 4);
3204   MemRandomize(dst_pixels_c, kPixels * 4);
3205 
3206   MaskCpuFlags(disable_cpu_flags_);
3207   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3208                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3209                  benchmark_width_, tmp_pixels_a, benchmark_width_,
3210                  benchmark_width_, benchmark_height_);
3211   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3212                  tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3213                  dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
3214                  benchmark_height_);
3215 
3216   MaskCpuFlags(benchmark_cpu_info_);
3217   for (int i = 0; i < benchmark_iterations_; ++i) {
3218     SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3219                    benchmark_width_, tmp_pixels_g, benchmark_width_,
3220                    tmp_pixels_b, benchmark_width_, tmp_pixels_a,
3221                    benchmark_width_, benchmark_width_, benchmark_height_);
3222   }
3223 
3224   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3225                  tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3226                  dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
3227                  benchmark_height_);
3228 
3229   for (int i = 0; i < kPixels * 4; ++i) {
3230     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3231   }
3232 
3233   free_aligned_buffer_page_end(src_pixels);
3234   free_aligned_buffer_page_end(tmp_pixels_r);
3235   free_aligned_buffer_page_end(tmp_pixels_g);
3236   free_aligned_buffer_page_end(tmp_pixels_b);
3237   free_aligned_buffer_page_end(tmp_pixels_a);
3238   free_aligned_buffer_page_end(dst_pixels_opt);
3239   free_aligned_buffer_page_end(dst_pixels_c);
3240 }
3241 
TEST_F(LibYUVPlanarTest,MergeXRGBPlane_Opt)3242 TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
3243   const int kPixels = benchmark_width_ * benchmark_height_;
3244   align_buffer_page_end(src_pixels, kPixels * 4);
3245   align_buffer_page_end(tmp_pixels_r, kPixels);
3246   align_buffer_page_end(tmp_pixels_g, kPixels);
3247   align_buffer_page_end(tmp_pixels_b, kPixels);
3248   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3249   align_buffer_page_end(dst_pixels_c, kPixels * 4);
3250 
3251   MemRandomize(src_pixels, kPixels * 4);
3252   MemRandomize(tmp_pixels_r, kPixels);
3253   MemRandomize(tmp_pixels_g, kPixels);
3254   MemRandomize(tmp_pixels_b, kPixels);
3255   MemRandomize(dst_pixels_opt, kPixels * 4);
3256   MemRandomize(dst_pixels_c, kPixels * 4);
3257 
3258   MaskCpuFlags(disable_cpu_flags_);
3259   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3260                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3261                  benchmark_width_, NULL, 0, benchmark_width_,
3262                  benchmark_height_);
3263   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3264                  tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
3265                  benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3266 
3267   MaskCpuFlags(benchmark_cpu_info_);
3268   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3269                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3270                  benchmark_width_, NULL, 0, benchmark_width_,
3271                  benchmark_height_);
3272 
3273   for (int i = 0; i < benchmark_iterations_; ++i) {
3274     MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3275                    benchmark_width_, tmp_pixels_b, benchmark_width_, NULL, 0,
3276                    dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
3277                    benchmark_height_);
3278   }
3279 
3280   for (int i = 0; i < kPixels * 4; ++i) {
3281     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3282   }
3283 
3284   free_aligned_buffer_page_end(src_pixels);
3285   free_aligned_buffer_page_end(tmp_pixels_r);
3286   free_aligned_buffer_page_end(tmp_pixels_g);
3287   free_aligned_buffer_page_end(tmp_pixels_b);
3288   free_aligned_buffer_page_end(dst_pixels_opt);
3289   free_aligned_buffer_page_end(dst_pixels_c);
3290 }
3291 
TEST_F(LibYUVPlanarTest,SplitXRGBPlane_Opt)3292 TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
3293   const int kPixels = benchmark_width_ * benchmark_height_;
3294   align_buffer_page_end(src_pixels, kPixels * 4);
3295   align_buffer_page_end(tmp_pixels_r, kPixels);
3296   align_buffer_page_end(tmp_pixels_g, kPixels);
3297   align_buffer_page_end(tmp_pixels_b, kPixels);
3298   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3299   align_buffer_page_end(dst_pixels_c, kPixels * 4);
3300 
3301   MemRandomize(src_pixels, kPixels * 4);
3302   MemRandomize(tmp_pixels_r, kPixels);
3303   MemRandomize(tmp_pixels_g, kPixels);
3304   MemRandomize(tmp_pixels_b, kPixels);
3305   MemRandomize(dst_pixels_opt, kPixels * 4);
3306   MemRandomize(dst_pixels_c, kPixels * 4);
3307 
3308   MaskCpuFlags(disable_cpu_flags_);
3309   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3310                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3311                  benchmark_width_, NULL, 0, benchmark_width_,
3312                  benchmark_height_);
3313   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3314                  tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
3315                  benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3316 
3317   MaskCpuFlags(benchmark_cpu_info_);
3318   for (int i = 0; i < benchmark_iterations_; ++i) {
3319     SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3320                    benchmark_width_, tmp_pixels_g, benchmark_width_,
3321                    tmp_pixels_b, benchmark_width_, NULL, 0, benchmark_width_,
3322                    benchmark_height_);
3323   }
3324 
3325   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3326                  tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_opt,
3327                  benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3328 
3329   for (int i = 0; i < kPixels * 4; ++i) {
3330     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3331   }
3332 
3333   free_aligned_buffer_page_end(src_pixels);
3334   free_aligned_buffer_page_end(tmp_pixels_r);
3335   free_aligned_buffer_page_end(tmp_pixels_g);
3336   free_aligned_buffer_page_end(tmp_pixels_b);
3337   free_aligned_buffer_page_end(dst_pixels_opt);
3338   free_aligned_buffer_page_end(dst_pixels_c);
3339 }
3340 
3341 // Merge 4 channels
3342 #define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF)      \
3343   TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) {                        \
3344     const int kWidth = W1280;                                               \
3345     const int kPixels = kWidth * benchmark_height_;                         \
3346     align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF);     \
3347     align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF);     \
3348     align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF);     \
3349     align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF);     \
3350     align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE));       \
3351     align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE));     \
3352     MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF);              \
3353     MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF);              \
3354     MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF);              \
3355     MemRandomize(src_memory_a, kPixels * sizeof(STYPE) + OFF);              \
3356     memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE));                   \
3357     memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE));                 \
3358     STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF);     \
3359     STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF);     \
3360     STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF);     \
3361     STYPE* src_pixels_a = reinterpret_cast<STYPE*>(src_memory_a + OFF);     \
3362     DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c);           \
3363     DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt);       \
3364     MaskCpuFlags(disable_cpu_flags_);                                       \
3365     FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b,   \
3366                 kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4,     \
3367                 kWidth, NEG benchmark_height_, DEPTH);                      \
3368     MaskCpuFlags(benchmark_cpu_info_);                                      \
3369     for (int i = 0; i < benchmark_iterations_; ++i) {                       \
3370       FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3371                   kWidth, src_pixels_a, kWidth, dst_pixels_opt, kWidth * 4, \
3372                   kWidth, NEG benchmark_height_, DEPTH);                    \
3373     }                                                                       \
3374     for (int i = 0; i < kPixels * 4; ++i) {                                 \
3375       EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);                        \
3376     }                                                                       \
3377     free_aligned_buffer_page_end(src_memory_r);                             \
3378     free_aligned_buffer_page_end(src_memory_g);                             \
3379     free_aligned_buffer_page_end(src_memory_b);                             \
3380     free_aligned_buffer_page_end(src_memory_a);                             \
3381     free_aligned_buffer_page_end(dst_memory_c);                             \
3382     free_aligned_buffer_page_end(dst_memory_opt);                           \
3383   }
3384 
3385 // Merge 3 channel RGB into 4 channel XRGB with opaque alpha
3386 #define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF)     \
3387   TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) {                 \
3388     const int kWidth = W1280;                                               \
3389     const int kPixels = kWidth * benchmark_height_;                         \
3390     align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF);     \
3391     align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF);     \
3392     align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF);     \
3393     align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE));       \
3394     align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE));     \
3395     MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF);              \
3396     MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF);              \
3397     MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF);              \
3398     memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE));                   \
3399     memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE));                 \
3400     STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF);     \
3401     STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF);     \
3402     STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF);     \
3403     DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c);           \
3404     DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt);       \
3405     MaskCpuFlags(disable_cpu_flags_);                                       \
3406     FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b,   \
3407                 kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth,          \
3408                 NEG benchmark_height_, DEPTH);                              \
3409     MaskCpuFlags(benchmark_cpu_info_);                                      \
3410     for (int i = 0; i < benchmark_iterations_; ++i) {                       \
3411       FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3412                   kWidth, NULL, 0, dst_pixels_opt, kWidth * 4, kWidth,      \
3413                   NEG benchmark_height_, DEPTH);                            \
3414     }                                                                       \
3415     for (int i = 0; i < kPixels * 4; ++i) {                                 \
3416       EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);                        \
3417     }                                                                       \
3418     free_aligned_buffer_page_end(src_memory_r);                             \
3419     free_aligned_buffer_page_end(src_memory_g);                             \
3420     free_aligned_buffer_page_end(src_memory_b);                             \
3421     free_aligned_buffer_page_end(dst_memory_c);                             \
3422     free_aligned_buffer_page_end(dst_memory_opt);                           \
3423   }
3424 
3425 #define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH)                              \
3426   TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
3427   TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +,  \
3428                   2)                                                           \
3429   TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0)  \
3430   TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)     \
3431   TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +,   \
3432                    0)                                                          \
3433   TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
3434                    2)                                                          \
3435   TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
3436   TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
3437 
3438 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 10)
3439 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 12)
3440 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 16)
3441 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 10)
3442 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 12)
3443 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
3444 
3445 #define TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF)      \
3446   TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) {                        \
3447     const int kWidth = W1280;                                               \
3448     const int kPixels = kWidth * benchmark_height_;                         \
3449     align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF);     \
3450     align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF);     \
3451     align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF);     \
3452     align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE));       \
3453     align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE));     \
3454     MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF);              \
3455     MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF);              \
3456     MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF);              \
3457     STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF);     \
3458     STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF);     \
3459     STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF);     \
3460     DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c);           \
3461     DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt);       \
3462     memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE));                   \
3463     memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE));                 \
3464     MaskCpuFlags(disable_cpu_flags_);                                       \
3465     FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b,   \
3466                 kWidth, dst_pixels_c, kWidth * 4, kWidth,                   \
3467                 NEG benchmark_height_, DEPTH);                              \
3468     MaskCpuFlags(benchmark_cpu_info_);                                      \
3469     for (int i = 0; i < benchmark_iterations_; ++i) {                       \
3470       FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3471                   kWidth, dst_pixels_opt, kWidth * 4, kWidth,               \
3472                   NEG benchmark_height_, DEPTH);                            \
3473     }                                                                       \
3474     for (int i = 0; i < kPixels * 4; ++i) {                                 \
3475       EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);                        \
3476     }                                                                       \
3477     free_aligned_buffer_page_end(src_memory_r);                             \
3478     free_aligned_buffer_page_end(src_memory_g);                             \
3479     free_aligned_buffer_page_end(src_memory_b);                             \
3480     free_aligned_buffer_page_end(dst_memory_c);                             \
3481     free_aligned_buffer_page_end(dst_memory_opt);                           \
3482   }
3483 
3484 #define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH)                              \
3485   TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
3486   TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +,  \
3487                   2)                                                           \
3488   TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0)  \
3489   TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
3490 
3491 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10)
3492 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12)
3493 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
3494 
3495 // TODO(fbarchard): improve test for platforms and cpu detect
3496 #ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest,MergeUVRow_16_Opt)3497 TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
3498   // Round count up to multiple of 16
3499   const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
3500 
3501   align_buffer_page_end(src_pixels_u, kPixels * 2);
3502   align_buffer_page_end(src_pixels_v, kPixels * 2);
3503   align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
3504   align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2);
3505 
3506   MemRandomize(src_pixels_u, kPixels * 2);
3507   MemRandomize(src_pixels_v, kPixels * 2);
3508   memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
3509   memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
3510 
3511   MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
3512                   reinterpret_cast<const uint16_t*>(src_pixels_v),
3513                   reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 16, kPixels);
3514 
3515   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3516   for (int i = 0; i < benchmark_iterations_; ++i) {
3517     if (has_avx2) {
3518       MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
3519                          reinterpret_cast<const uint16_t*>(src_pixels_v),
3520                          reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
3521                          kPixels);
3522     } else {
3523       MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
3524                       reinterpret_cast<const uint16_t*>(src_pixels_v),
3525                       reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
3526                       kPixels);
3527     }
3528   }
3529 
3530   for (int i = 0; i < kPixels * 2 * 2; ++i) {
3531     EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]);
3532   }
3533 
3534   free_aligned_buffer_page_end(src_pixels_u);
3535   free_aligned_buffer_page_end(src_pixels_v);
3536   free_aligned_buffer_page_end(dst_pixels_uv_opt);
3537   free_aligned_buffer_page_end(dst_pixels_uv_c);
3538 }
3539 #endif
3540 
3541 // TODO(fbarchard): Improve test for more platforms.
3542 #ifdef HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest,MultiplyRow_16_Opt)3543 TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
3544   // Round count up to multiple of 32
3545   const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3546 
3547   align_buffer_page_end(src_pixels_y, kPixels * 2);
3548   align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3549   align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3550 
3551   MemRandomize(src_pixels_y, kPixels * 2);
3552   memset(dst_pixels_y_opt, 0, kPixels * 2);
3553   memset(dst_pixels_y_c, 1, kPixels * 2);
3554 
3555   MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3556                    reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels);
3557 
3558   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3559   for (int i = 0; i < benchmark_iterations_; ++i) {
3560     if (has_avx2) {
3561       MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
3562                           reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
3563                           kPixels);
3564     } else {
3565       MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3566                        reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
3567                        kPixels);
3568     }
3569   }
3570 
3571   for (int i = 0; i < kPixels * 2; ++i) {
3572     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3573   }
3574 
3575   free_aligned_buffer_page_end(src_pixels_y);
3576   free_aligned_buffer_page_end(dst_pixels_y_opt);
3577   free_aligned_buffer_page_end(dst_pixels_y_c);
3578 }
3579 #endif  // HAS_MULTIPLYROW_16_AVX2
3580 
TEST_F(LibYUVPlanarTest,Convert16To8Plane)3581 TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
3582   const int kPixels = benchmark_width_ * benchmark_height_;
3583   align_buffer_page_end(src_pixels_y, kPixels * 2);
3584   align_buffer_page_end(dst_pixels_y_opt, kPixels);
3585   align_buffer_page_end(dst_pixels_y_c, kPixels);
3586 
3587   MemRandomize(src_pixels_y, kPixels * 2);
3588   memset(dst_pixels_y_opt, 0, kPixels);
3589   memset(dst_pixels_y_c, 1, kPixels);
3590 
3591   MaskCpuFlags(disable_cpu_flags_);
3592   Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
3593                     benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
3594                     benchmark_width_, benchmark_height_);
3595   MaskCpuFlags(benchmark_cpu_info_);
3596 
3597   for (int i = 0; i < benchmark_iterations_; ++i) {
3598     Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
3599                       benchmark_width_, dst_pixels_y_opt, benchmark_width_,
3600                       16384, benchmark_width_, benchmark_height_);
3601   }
3602 
3603   for (int i = 0; i < kPixels; ++i) {
3604     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3605   }
3606 
3607   free_aligned_buffer_page_end(src_pixels_y);
3608   free_aligned_buffer_page_end(dst_pixels_y_opt);
3609   free_aligned_buffer_page_end(dst_pixels_y_c);
3610 }
3611 
TEST_F(LibYUVPlanarTest,YUY2ToY)3612 TEST_F(LibYUVPlanarTest, YUY2ToY) {
3613   const int kPixels = benchmark_width_ * benchmark_height_;
3614   align_buffer_page_end(src_pixels_y, kPixels * 2);
3615   align_buffer_page_end(dst_pixels_y_opt, kPixels);
3616   align_buffer_page_end(dst_pixels_y_c, kPixels);
3617 
3618   MemRandomize(src_pixels_y, kPixels * 2);
3619   memset(dst_pixels_y_opt, 0, kPixels);
3620   memset(dst_pixels_y_c, 1, kPixels);
3621 
3622   MaskCpuFlags(disable_cpu_flags_);
3623   YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
3624           benchmark_width_, benchmark_height_);
3625   MaskCpuFlags(benchmark_cpu_info_);
3626 
3627   for (int i = 0; i < benchmark_iterations_; ++i) {
3628     YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
3629             benchmark_width_, benchmark_width_, benchmark_height_);
3630   }
3631 
3632   for (int i = 0; i < kPixels; ++i) {
3633     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3634   }
3635 
3636   free_aligned_buffer_page_end(src_pixels_y);
3637   free_aligned_buffer_page_end(dst_pixels_y_opt);
3638   free_aligned_buffer_page_end(dst_pixels_y_c);
3639 }
3640 
TEST_F(LibYUVPlanarTest,UYVYToY)3641 TEST_F(LibYUVPlanarTest, UYVYToY) {
3642   const int kPixels = benchmark_width_ * benchmark_height_;
3643   align_buffer_page_end(src_pixels_y, kPixels * 2);
3644   align_buffer_page_end(dst_pixels_y_opt, kPixels);
3645   align_buffer_page_end(dst_pixels_y_c, kPixels);
3646 
3647   MemRandomize(src_pixels_y, kPixels * 2);
3648   memset(dst_pixels_y_opt, 0, kPixels);
3649   memset(dst_pixels_y_c, 1, kPixels);
3650 
3651   MaskCpuFlags(disable_cpu_flags_);
3652   UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
3653           benchmark_width_, benchmark_height_);
3654   MaskCpuFlags(benchmark_cpu_info_);
3655 
3656   for (int i = 0; i < benchmark_iterations_; ++i) {
3657     UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
3658             benchmark_width_, benchmark_width_, benchmark_height_);
3659   }
3660 
3661   for (int i = 0; i < kPixels; ++i) {
3662     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3663   }
3664 
3665   free_aligned_buffer_page_end(src_pixels_y);
3666   free_aligned_buffer_page_end(dst_pixels_y_opt);
3667   free_aligned_buffer_page_end(dst_pixels_y_c);
3668 }
3669 
3670 #ifdef ENABLE_ROW_TESTS
3671 // TODO(fbarchard): Improve test for more platforms.
3672 #ifdef HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert16To8Row_Opt)3673 TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
3674   // AVX2 does multiple of 32, so round count up
3675   const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3676   align_buffer_page_end(src_pixels_y, kPixels * 2);
3677   align_buffer_page_end(dst_pixels_y_opt, kPixels);
3678   align_buffer_page_end(dst_pixels_y_c, kPixels);
3679 
3680   MemRandomize(src_pixels_y, kPixels * 2);
3681   // clamp source range to 10 bits.
3682   for (int i = 0; i < kPixels; ++i) {
3683     reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023;
3684   }
3685 
3686   memset(dst_pixels_y_opt, 0, kPixels);
3687   memset(dst_pixels_y_c, 1, kPixels);
3688 
3689   Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3690                     dst_pixels_y_c, 16384, kPixels);
3691 
3692   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3693   int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
3694   for (int i = 0; i < benchmark_iterations_; ++i) {
3695     if (has_avx2) {
3696       Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
3697                            dst_pixels_y_opt, 16384, kPixels);
3698     } else if (has_ssse3) {
3699       Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y),
3700                             dst_pixels_y_opt, 16384, kPixels);
3701     } else {
3702       Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3703                         dst_pixels_y_opt, 16384, kPixels);
3704     }
3705   }
3706 
3707   for (int i = 0; i < kPixels; ++i) {
3708     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3709   }
3710 
3711   free_aligned_buffer_page_end(src_pixels_y);
3712   free_aligned_buffer_page_end(dst_pixels_y_opt);
3713   free_aligned_buffer_page_end(dst_pixels_y_c);
3714 }
3715 #endif  // HAS_CONVERT16TO8ROW_AVX2
3716 
3717 #ifdef HAS_UYVYTOYROW_NEON
TEST_F(LibYUVPlanarTest,UYVYToYRow_Opt)3718 TEST_F(LibYUVPlanarTest, UYVYToYRow_Opt) {
3719   // NEON does multiple of 16, so round count up
3720   const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
3721   align_buffer_page_end(src_pixels_y, kPixels * 2);
3722   align_buffer_page_end(dst_pixels_y_opt, kPixels);
3723   align_buffer_page_end(dst_pixels_y_c, kPixels);
3724 
3725   MemRandomize(src_pixels_y, kPixels * 2);
3726   memset(dst_pixels_y_opt, 0, kPixels);
3727   memset(dst_pixels_y_c, 1, kPixels);
3728 
3729   UYVYToYRow_C(src_pixels_y, dst_pixels_y_c, kPixels);
3730 
3731   for (int i = 0; i < benchmark_iterations_; ++i) {
3732     UYVYToYRow_NEON(src_pixels_y, dst_pixels_y_opt, kPixels);
3733   }
3734 
3735   for (int i = 0; i < kPixels; ++i) {
3736     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3737   }
3738 
3739   free_aligned_buffer_page_end(src_pixels_y);
3740   free_aligned_buffer_page_end(dst_pixels_y_opt);
3741   free_aligned_buffer_page_end(dst_pixels_y_c);
3742 }
3743 #endif  // HAS_UYVYTOYROW_NEON
3744 
3745 #endif  // ENABLE_ROW_TESTS
3746 
TEST_F(LibYUVPlanarTest,Convert8To16Plane)3747 TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
3748   const int kPixels = benchmark_width_ * benchmark_height_;
3749   align_buffer_page_end(src_pixels_y, kPixels);
3750   align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3751   align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3752 
3753   MemRandomize(src_pixels_y, kPixels);
3754   memset(dst_pixels_y_opt, 0, kPixels * 2);
3755   memset(dst_pixels_y_c, 1, kPixels * 2);
3756 
3757   MaskCpuFlags(disable_cpu_flags_);
3758   Convert8To16Plane(src_pixels_y, benchmark_width_,
3759                     reinterpret_cast<uint16_t*>(dst_pixels_y_c),
3760                     benchmark_width_, 1024, benchmark_width_,
3761                     benchmark_height_);
3762   MaskCpuFlags(benchmark_cpu_info_);
3763 
3764   for (int i = 0; i < benchmark_iterations_; ++i) {
3765     Convert8To16Plane(src_pixels_y, benchmark_width_,
3766                       reinterpret_cast<uint16_t*>(dst_pixels_y_opt),
3767                       benchmark_width_, 1024, benchmark_width_,
3768                       benchmark_height_);
3769   }
3770 
3771   for (int i = 0; i < kPixels * 2; ++i) {
3772     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3773   }
3774 
3775   free_aligned_buffer_page_end(src_pixels_y);
3776   free_aligned_buffer_page_end(dst_pixels_y_opt);
3777   free_aligned_buffer_page_end(dst_pixels_y_c);
3778 }
3779 
3780 #ifdef ENABLE_ROW_TESTS
3781 // TODO(fbarchard): Improve test for more platforms.
3782 #ifdef HAS_CONVERT8TO16ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert8To16Row_Opt)3783 TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
3784   const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3785   align_buffer_page_end(src_pixels_y, kPixels);
3786   align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3787   align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3788 
3789   MemRandomize(src_pixels_y, kPixels);
3790   memset(dst_pixels_y_opt, 0, kPixels * 2);
3791   memset(dst_pixels_y_c, 1, kPixels * 2);
3792 
3793   Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c),
3794                     1024, kPixels);
3795 
3796   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3797   int has_sse2 = TestCpuFlag(kCpuHasSSE2);
3798   for (int i = 0; i < benchmark_iterations_; ++i) {
3799     if (has_avx2) {
3800       Convert8To16Row_AVX2(src_pixels_y,
3801                            reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3802                            kPixels);
3803     } else if (has_sse2) {
3804       Convert8To16Row_SSE2(src_pixels_y,
3805                            reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3806                            kPixels);
3807     } else {
3808       Convert8To16Row_C(src_pixels_y,
3809                         reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3810                         kPixels);
3811     }
3812   }
3813 
3814   for (int i = 0; i < kPixels * 2; ++i) {
3815     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3816   }
3817 
3818   free_aligned_buffer_page_end(src_pixels_y);
3819   free_aligned_buffer_page_end(dst_pixels_y_opt);
3820   free_aligned_buffer_page_end(dst_pixels_y_c);
3821 }
3822 #endif  // HAS_CONVERT8TO16ROW_AVX2
3823 
TestScaleMaxSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3824 float TestScaleMaxSamples(int benchmark_width,
3825                           int benchmark_height,
3826                           int benchmark_iterations,
3827                           float scale,
3828                           bool opt) {
3829   int i, j;
3830   float max_c, max_opt = 0.f;
3831   // NEON does multiple of 8, so round count up
3832   const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3833   align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48);
3834   uint8_t* dst_c = orig_y + kPixels * 4 + 16;
3835   uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32;
3836 
3837   // Randomize works but may contain some denormals affecting performance.
3838   // MemRandomize(orig_y, kPixels * 4);
3839   // large values are problematic.  audio is really -1 to 1.
3840   for (i = 0; i < kPixels; ++i) {
3841     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3842   }
3843   memset(dst_c, 0, kPixels * 4);
3844   memset(dst_opt, 1, kPixels * 4);
3845 
3846   max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3847                             reinterpret_cast<float*>(dst_c), scale, kPixels);
3848 
3849   for (j = 0; j < benchmark_iterations; j++) {
3850     if (opt) {
3851 #ifdef HAS_SCALESUMSAMPLES_NEON
3852       max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y),
3853                                      reinterpret_cast<float*>(dst_opt), scale,
3854                                      kPixels);
3855 #else
3856       max_opt =
3857           ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3858                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
3859 #endif
3860     } else {
3861       max_opt =
3862           ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3863                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
3864     }
3865   }
3866 
3867   float max_diff = FAbs(max_opt - max_c);
3868   for (i = 0; i < kPixels; ++i) {
3869     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3870                           (reinterpret_cast<float*>(dst_opt)[i]));
3871     if (abs_diff > max_diff) {
3872       max_diff = abs_diff;
3873     }
3874   }
3875 
3876   free_aligned_buffer_page_end(orig_y);
3877   return max_diff;
3878 }
3879 
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_C)3880 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) {
3881   float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
3882                                    benchmark_iterations_, 1.2f, false);
3883   EXPECT_EQ(0, diff);
3884 }
3885 
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_Opt)3886 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) {
3887   float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
3888                                    benchmark_iterations_, 1.2f, true);
3889   EXPECT_EQ(0, diff);
3890 }
3891 
TestScaleSumSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3892 float TestScaleSumSamples(int benchmark_width,
3893                           int benchmark_height,
3894                           int benchmark_iterations,
3895                           float scale,
3896                           bool opt) {
3897   int i, j;
3898   float sum_c, sum_opt = 0.f;
3899   // NEON does multiple of 8, so round count up
3900   const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3901   align_buffer_page_end(orig_y, kPixels * 4 * 3);
3902   uint8_t* dst_c = orig_y + kPixels * 4;
3903   uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
3904 
3905   // Randomize works but may contain some denormals affecting performance.
3906   // MemRandomize(orig_y, kPixels * 4);
3907   // large values are problematic.  audio is really -1 to 1.
3908   for (i = 0; i < kPixels; ++i) {
3909     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3910   }
3911   memset(dst_c, 0, kPixels * 4);
3912   memset(dst_opt, 1, kPixels * 4);
3913 
3914   sum_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
3915                             reinterpret_cast<float*>(dst_c), scale, kPixels);
3916 
3917   for (j = 0; j < benchmark_iterations; j++) {
3918     if (opt) {
3919 #ifdef HAS_SCALESUMSAMPLES_NEON
3920       sum_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y),
3921                                      reinterpret_cast<float*>(dst_opt), scale,
3922                                      kPixels);
3923 #else
3924       sum_opt =
3925           ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
3926                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
3927 #endif
3928     } else {
3929       sum_opt =
3930           ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
3931                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
3932     }
3933   }
3934 
3935   float mse_opt = sum_opt / kPixels * 4;
3936   float mse_c = sum_c / kPixels * 4;
3937   float mse_error = FAbs(mse_opt - mse_c) / mse_c;
3938 
3939   // If the sum of a float is more than 4 million, small adds are round down on
3940   // float and produce different results with vectorized sum vs scalar sum.
3941   // Ignore the difference if the sum is large.
3942   float max_diff = 0.f;
3943   if (mse_error > 0.0001 && sum_c < 4000000) {  // allow .01% difference of mse
3944     max_diff = mse_error;
3945   }
3946 
3947   for (i = 0; i < kPixels; ++i) {
3948     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3949                           (reinterpret_cast<float*>(dst_opt)[i]));
3950     if (abs_diff > max_diff) {
3951       max_diff = abs_diff;
3952     }
3953   }
3954 
3955   free_aligned_buffer_page_end(orig_y);
3956   return max_diff;
3957 }
3958 
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_C)3959 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) {
3960   float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
3961                                    benchmark_iterations_, 1.2f, false);
3962   EXPECT_EQ(0, diff);
3963 }
3964 
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_Opt)3965 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) {
3966   float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
3967                                    benchmark_iterations_, 1.2f, true);
3968   EXPECT_EQ(0, diff);
3969 }
3970 
TestScaleSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3971 float TestScaleSamples(int benchmark_width,
3972                        int benchmark_height,
3973                        int benchmark_iterations,
3974                        float scale,
3975                        bool opt) {
3976   int i, j;
3977   // NEON does multiple of 8, so round count up
3978   const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3979   align_buffer_page_end(orig_y, kPixels * 4 * 3);
3980   uint8_t* dst_c = orig_y + kPixels * 4;
3981   uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
3982 
3983   // Randomize works but may contain some denormals affecting performance.
3984   // MemRandomize(orig_y, kPixels * 4);
3985   // large values are problematic.  audio is really -1 to 1.
3986   for (i = 0; i < kPixels; ++i) {
3987     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3988   }
3989   memset(dst_c, 0, kPixels * 4);
3990   memset(dst_opt, 1, kPixels * 4);
3991 
3992   ScaleSamples_C(reinterpret_cast<float*>(orig_y),
3993                  reinterpret_cast<float*>(dst_c), scale, kPixels);
3994 
3995   for (j = 0; j < benchmark_iterations; j++) {
3996     if (opt) {
3997 #ifdef HAS_SCALESUMSAMPLES_NEON
3998       ScaleSamples_NEON(reinterpret_cast<float*>(orig_y),
3999                         reinterpret_cast<float*>(dst_opt), scale, kPixels);
4000 #else
4001       ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4002                      reinterpret_cast<float*>(dst_opt), scale, kPixels);
4003 #endif
4004     } else {
4005       ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4006                      reinterpret_cast<float*>(dst_opt), scale, kPixels);
4007     }
4008   }
4009 
4010   float max_diff = 0.f;
4011   for (i = 0; i < kPixels; ++i) {
4012     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4013                           (reinterpret_cast<float*>(dst_opt)[i]));
4014     if (abs_diff > max_diff) {
4015       max_diff = abs_diff;
4016     }
4017   }
4018 
4019   free_aligned_buffer_page_end(orig_y);
4020   return max_diff;
4021 }
4022 
TEST_F(LibYUVPlanarTest,TestScaleSamples_C)4023 TEST_F(LibYUVPlanarTest, TestScaleSamples_C) {
4024   float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
4025                                 benchmark_iterations_, 1.2f, false);
4026   EXPECT_EQ(0, diff);
4027 }
4028 
TEST_F(LibYUVPlanarTest,TestScaleSamples_Opt)4029 TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) {
4030   float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
4031                                 benchmark_iterations_, 1.2f, true);
4032   EXPECT_EQ(0, diff);
4033 }
4034 
TestCopySamples(int benchmark_width,int benchmark_height,int benchmark_iterations,bool opt)4035 float TestCopySamples(int benchmark_width,
4036                       int benchmark_height,
4037                       int benchmark_iterations,
4038                       bool opt) {
4039   int i, j;
4040   // NEON does multiple of 16 floats, so round count up
4041   const int kPixels = (benchmark_width * benchmark_height + 15) & ~15;
4042   align_buffer_page_end(orig_y, kPixels * 4 * 3);
4043   uint8_t* dst_c = orig_y + kPixels * 4;
4044   uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
4045 
4046   // Randomize works but may contain some denormals affecting performance.
4047   // MemRandomize(orig_y, kPixels * 4);
4048   // large values are problematic.  audio is really -1 to 1.
4049   for (i = 0; i < kPixels; ++i) {
4050     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
4051   }
4052   memset(dst_c, 0, kPixels * 4);
4053   memset(dst_opt, 1, kPixels * 4);
4054 
4055   memcpy(reinterpret_cast<void*>(dst_c), reinterpret_cast<void*>(orig_y),
4056          kPixels * 4);
4057 
4058   for (j = 0; j < benchmark_iterations; j++) {
4059     if (opt) {
4060 #ifdef HAS_COPYROW_NEON
4061       CopyRow_NEON(orig_y, dst_opt, kPixels * 4);
4062 #else
4063       CopyRow_C(orig_y, dst_opt, kPixels * 4);
4064 #endif
4065     } else {
4066       CopyRow_C(orig_y, dst_opt, kPixels * 4);
4067     }
4068   }
4069 
4070   float max_diff = 0.f;
4071   for (i = 0; i < kPixels; ++i) {
4072     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4073                           (reinterpret_cast<float*>(dst_opt)[i]));
4074     if (abs_diff > max_diff) {
4075       max_diff = abs_diff;
4076     }
4077   }
4078 
4079   free_aligned_buffer_page_end(orig_y);
4080   return max_diff;
4081 }
4082 
TEST_F(LibYUVPlanarTest,TestCopySamples_C)4083 TEST_F(LibYUVPlanarTest, TestCopySamples_C) {
4084   float diff = TestCopySamples(benchmark_width_, benchmark_height_,
4085                                benchmark_iterations_, false);
4086   EXPECT_EQ(0, diff);
4087 }
4088 
TEST_F(LibYUVPlanarTest,TestCopySamples_Opt)4089 TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) {
4090   float diff = TestCopySamples(benchmark_width_, benchmark_height_,
4091                                benchmark_iterations_, true);
4092   EXPECT_EQ(0, diff);
4093 }
4094 
4095 extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
4096 extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
4097 
TEST_F(LibYUVPlanarTest,TestGaussRow_Opt)4098 TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
4099   SIMD_ALIGNED(uint32_t orig_pixels[1280 + 8]);
4100   SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
4101   SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
4102 
4103   memset(orig_pixels, 0, sizeof(orig_pixels));
4104   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4105   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4106 
4107   for (int i = 0; i < 1280 + 8; ++i) {
4108     orig_pixels[i] = i * 256;
4109   }
4110   GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
4111   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4112 #if !defined(LIBYUV_DISABLE_NEON) && \
4113     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
4114     int has_neon = TestCpuFlag(kCpuHasNEON);
4115     if (has_neon) {
4116       GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4117     } else {
4118       GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4119     }
4120 #else
4121     GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4122 #endif
4123   }
4124 
4125   for (int i = 0; i < 1280; ++i) {
4126     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4127   }
4128 
4129   EXPECT_EQ(dst_pixels_c[0],
4130             static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1));
4131   EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256));
4132 }
4133 
4134 extern "C" void GaussCol_NEON(const uint16_t* src0,
4135                               const uint16_t* src1,
4136                               const uint16_t* src2,
4137                               const uint16_t* src3,
4138                               const uint16_t* src4,
4139                               uint32_t* dst,
4140                               int width);
4141 
4142 extern "C" void GaussCol_C(const uint16_t* src0,
4143                            const uint16_t* src1,
4144                            const uint16_t* src2,
4145                            const uint16_t* src3,
4146                            const uint16_t* src4,
4147                            uint32_t* dst,
4148                            int width);
4149 
TEST_F(LibYUVPlanarTest,TestGaussCol_Opt)4150 TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
4151   SIMD_ALIGNED(uint16_t orig_pixels[1280 * 5]);
4152   SIMD_ALIGNED(uint32_t dst_pixels_c[1280]);
4153   SIMD_ALIGNED(uint32_t dst_pixels_opt[1280]);
4154 
4155   memset(orig_pixels, 0, sizeof(orig_pixels));
4156   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4157   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4158 
4159   for (int i = 0; i < 1280 * 5; ++i) {
4160     orig_pixels[i] = static_cast<float>(i);
4161   }
4162   GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4163              &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], &dst_pixels_c[0],
4164              1280);
4165   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4166 #if !defined(LIBYUV_DISABLE_NEON) && \
4167     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
4168     int has_neon = TestCpuFlag(kCpuHasNEON);
4169     if (has_neon) {
4170       GaussCol_NEON(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4171                     &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4172                     &dst_pixels_opt[0], 1280);
4173     } else {
4174       GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4175                  &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4176                  &dst_pixels_opt[0], 1280);
4177     }
4178 #else
4179     GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4180                &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4181                &dst_pixels_opt[0], 1280);
4182 #endif
4183   }
4184 
4185   for (int i = 0; i < 1280; ++i) {
4186     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4187   }
4188 }
4189 
TEST_F(LibYUVPlanarTest,TestGaussRow_F32_Opt)4190 TEST_F(LibYUVPlanarTest, TestGaussRow_F32_Opt) {
4191   SIMD_ALIGNED(float orig_pixels[1280 + 4]);
4192   SIMD_ALIGNED(float dst_pixels_c[1280]);
4193   SIMD_ALIGNED(float dst_pixels_opt[1280]);
4194 
4195   memset(orig_pixels, 0, sizeof(orig_pixels));
4196   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4197   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4198 
4199   for (int i = 0; i < 1280 + 4; ++i) {
4200     orig_pixels[i] = static_cast<float>(i);
4201   }
4202   GaussRow_F32_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
4203   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4204 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
4205     int has_neon = TestCpuFlag(kCpuHasNEON);
4206     if (has_neon) {
4207       GaussRow_F32_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4208     } else {
4209       GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4210     }
4211 #else
4212     GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4213 #endif
4214   }
4215 
4216   for (int i = 0; i < 1280; ++i) {
4217     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4218   }
4219 }
4220 
TEST_F(LibYUVPlanarTest,TestGaussCol_F32_Opt)4221 TEST_F(LibYUVPlanarTest, TestGaussCol_F32_Opt) {
4222   SIMD_ALIGNED(float dst_pixels_c[1280]);
4223   SIMD_ALIGNED(float dst_pixels_opt[1280]);
4224   align_buffer_page_end(orig_pixels_buf, 1280 * 5 * 4);  // 5 rows
4225   float* orig_pixels = reinterpret_cast<float*>(orig_pixels_buf);
4226 
4227   memset(orig_pixels, 0, 1280 * 5 * 4);
4228   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4229   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4230 
4231   for (int i = 0; i < 1280 * 5; ++i) {
4232     orig_pixels[i] = static_cast<float>(i);
4233   }
4234   GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4235                  &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4236                  &dst_pixels_c[0], 1280);
4237   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4238 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
4239     int has_neon = TestCpuFlag(kCpuHasNEON);
4240     if (has_neon) {
4241       GaussCol_F32_NEON(&orig_pixels[0], &orig_pixels[1280],
4242                         &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
4243                         &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
4244     } else {
4245       GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280],
4246                      &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
4247                      &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
4248     }
4249 #else
4250     GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4251                    &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4252                    &dst_pixels_opt[0], 1280);
4253 #endif
4254   }
4255 
4256   for (int i = 0; i < 1280; ++i) {
4257     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4258   }
4259   free_aligned_buffer_page_end(orig_pixels_buf);
4260 }
4261 
TEST_F(LibYUVPlanarTest,SwapUVRow)4262 TEST_F(LibYUVPlanarTest, SwapUVRow) {
4263   const int kPixels = benchmark_width_ * benchmark_height_;
4264   void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
4265       SwapUVRow_C;
4266 
4267   align_buffer_page_end(src_pixels_vu, kPixels * 2);
4268   align_buffer_page_end(dst_pixels_uv, kPixels * 2);
4269   MemRandomize(src_pixels_vu, kPixels * 2);
4270   memset(dst_pixels_uv, 1, kPixels * 2);
4271 
4272 #if defined(HAS_SWAPUVROW_NEON)
4273   if (TestCpuFlag(kCpuHasNEON)) {
4274     SwapUVRow = SwapUVRow_Any_NEON;
4275     if (IS_ALIGNED(kPixels, 16)) {
4276       SwapUVRow = SwapUVRow_NEON;
4277     }
4278   }
4279 #endif
4280 
4281   for (int j = 0; j < benchmark_iterations_; j++) {
4282     SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
4283   }
4284   for (int i = 0; i < kPixels; ++i) {
4285     EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
4286     EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
4287   }
4288 
4289   free_aligned_buffer_page_end(src_pixels_vu);
4290   free_aligned_buffer_page_end(dst_pixels_uv);
4291 }
4292 #endif  // ENABLE_ROW_TESTS
4293 
TEST_F(LibYUVPlanarTest,TestGaussPlane_F32)4294 TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) {
4295   const int kSize = benchmark_width_ * benchmark_height_ * 4;
4296   align_buffer_page_end(orig_pixels, kSize);
4297   align_buffer_page_end(dst_pixels_opt, kSize);
4298   align_buffer_page_end(dst_pixels_c, kSize);
4299 
4300   for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4301     ((float*)(orig_pixels))[i] = (i & 1023) * 3.14f;
4302   }
4303   memset(dst_pixels_opt, 1, kSize);
4304   memset(dst_pixels_c, 2, kSize);
4305 
4306   MaskCpuFlags(disable_cpu_flags_);
4307   GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
4308                  (float*)(dst_pixels_c), benchmark_width_, benchmark_width_,
4309                  benchmark_height_);
4310   MaskCpuFlags(benchmark_cpu_info_);
4311 
4312   for (int i = 0; i < benchmark_iterations_; ++i) {
4313     GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
4314                    (float*)(dst_pixels_opt), benchmark_width_, benchmark_width_,
4315                    benchmark_height_);
4316   }
4317   for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4318     EXPECT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f)
4319         << i;
4320   }
4321 
4322   free_aligned_buffer_page_end(dst_pixels_c);
4323   free_aligned_buffer_page_end(dst_pixels_opt);
4324   free_aligned_buffer_page_end(orig_pixels);
4325 }
4326 
TEST_F(LibYUVPlanarTest,HalfMergeUVPlane_Opt)4327 TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) {
4328   int dst_width = (benchmark_width_ + 1) / 2;
4329   int dst_height = (benchmark_height_ + 1) / 2;
4330   align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_);
4331   align_buffer_page_end(src_pixels_v, benchmark_width_ * benchmark_height_);
4332   align_buffer_page_end(tmp_pixels_u, dst_width * dst_height);
4333   align_buffer_page_end(tmp_pixels_v, dst_width * dst_height);
4334   align_buffer_page_end(dst_pixels_uv_opt, dst_width * 2 * dst_height);
4335   align_buffer_page_end(dst_pixels_uv_c, dst_width * 2 * dst_height);
4336 
4337   MemRandomize(src_pixels_u, benchmark_width_ * benchmark_height_);
4338   MemRandomize(src_pixels_v, benchmark_width_ * benchmark_height_);
4339   MemRandomize(tmp_pixels_u, dst_width * dst_height);
4340   MemRandomize(tmp_pixels_v, dst_width * dst_height);
4341   MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height);
4342   MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height);
4343 
4344   MaskCpuFlags(disable_cpu_flags_);
4345   HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
4346                    benchmark_width_, dst_pixels_uv_c, dst_width * 2,
4347                    benchmark_width_, benchmark_height_);
4348   MaskCpuFlags(benchmark_cpu_info_);
4349 
4350   for (int i = 0; i < benchmark_iterations_; ++i) {
4351     HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
4352                      benchmark_width_, dst_pixels_uv_opt, dst_width * 2,
4353                      benchmark_width_, benchmark_height_);
4354   }
4355 
4356   for (int i = 0; i < dst_width * 2 * dst_height; ++i) {
4357     EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]);
4358   }
4359 
4360   free_aligned_buffer_page_end(src_pixels_u);
4361   free_aligned_buffer_page_end(src_pixels_v);
4362   free_aligned_buffer_page_end(tmp_pixels_u);
4363   free_aligned_buffer_page_end(tmp_pixels_v);
4364   free_aligned_buffer_page_end(dst_pixels_uv_opt);
4365   free_aligned_buffer_page_end(dst_pixels_uv_c);
4366 }
4367 
TEST_F(LibYUVPlanarTest,NV12Copy)4368 TEST_F(LibYUVPlanarTest, NV12Copy) {
4369   const int halfwidth = (benchmark_width_ + 1) >> 1;
4370   const int halfheight = (benchmark_height_ + 1) >> 1;
4371   align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
4372   align_buffer_page_end(src_uv, halfwidth * 2 * halfheight);
4373   align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
4374   align_buffer_page_end(dst_uv, halfwidth * 2 * halfheight);
4375 
4376   MemRandomize(src_y, benchmark_width_ * benchmark_height_);
4377   MemRandomize(src_uv, halfwidth * 2 * halfheight);
4378   MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
4379   MemRandomize(dst_uv, halfwidth * 2 * halfheight);
4380 
4381   for (int i = 0; i < benchmark_iterations_; ++i) {
4382     NV12Copy(src_y, benchmark_width_, src_uv, halfwidth * 2, dst_y,
4383              benchmark_width_, dst_uv, halfwidth * 2, benchmark_width_,
4384              benchmark_height_);
4385   }
4386 
4387   for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4388     EXPECT_EQ(src_y[i], dst_y[i]);
4389   }
4390   for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
4391     EXPECT_EQ(src_uv[i], dst_uv[i]);
4392   }
4393 
4394   free_aligned_buffer_page_end(src_y);
4395   free_aligned_buffer_page_end(src_uv);
4396   free_aligned_buffer_page_end(dst_y);
4397   free_aligned_buffer_page_end(dst_uv);
4398 }
4399 
TEST_F(LibYUVPlanarTest,NV21Copy)4400 TEST_F(LibYUVPlanarTest, NV21Copy) {
4401   const int halfwidth = (benchmark_width_ + 1) >> 1;
4402   const int halfheight = (benchmark_height_ + 1) >> 1;
4403   align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
4404   align_buffer_page_end(src_vu, halfwidth * 2 * halfheight);
4405   align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
4406   align_buffer_page_end(dst_vu, halfwidth * 2 * halfheight);
4407 
4408   MemRandomize(src_y, benchmark_width_ * benchmark_height_);
4409   MemRandomize(src_vu, halfwidth * 2 * halfheight);
4410   MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
4411   MemRandomize(dst_vu, halfwidth * 2 * halfheight);
4412 
4413   for (int i = 0; i < benchmark_iterations_; ++i) {
4414     NV21Copy(src_y, benchmark_width_, src_vu, halfwidth * 2, dst_y,
4415              benchmark_width_, dst_vu, halfwidth * 2, benchmark_width_,
4416              benchmark_height_);
4417   }
4418 
4419   for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4420     EXPECT_EQ(src_y[i], dst_y[i]);
4421   }
4422   for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
4423     EXPECT_EQ(src_vu[i], dst_vu[i]);
4424   }
4425 
4426   free_aligned_buffer_page_end(src_y);
4427   free_aligned_buffer_page_end(src_vu);
4428   free_aligned_buffer_page_end(dst_y);
4429   free_aligned_buffer_page_end(dst_vu);
4430 }
4431 
4432 }  // namespace libyuv
4433