• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stdlib.h>
12 #include <time.h>
13 
14 #include "../unit_test/unit_test.h"
15 #include "libyuv/compare.h"
16 #include "libyuv/convert.h"
17 #include "libyuv/convert_argb.h"
18 #include "libyuv/convert_from.h"
19 #include "libyuv/convert_from_argb.h"
20 #include "libyuv/cpu_id.h"
21 #include "libyuv/planar_functions.h"
22 #include "libyuv/rotate.h"
23 
24 namespace libyuv {
25 
TEST_F(LibYUVPlanarTest,TestAttenuate)26 TEST_F(LibYUVPlanarTest, TestAttenuate) {
27   const int kSize = 1280 * 4;
28   align_buffer_page_end(orig_pixels, kSize);
29   align_buffer_page_end(atten_pixels, kSize);
30   align_buffer_page_end(unatten_pixels, kSize);
31   align_buffer_page_end(atten2_pixels, kSize);
32 
33   // Test unattenuation clamps
34   orig_pixels[0 * 4 + 0] = 200u;
35   orig_pixels[0 * 4 + 1] = 129u;
36   orig_pixels[0 * 4 + 2] = 127u;
37   orig_pixels[0 * 4 + 3] = 128u;
38   // Test unattenuation transparent and opaque are unaffected
39   orig_pixels[1 * 4 + 0] = 16u;
40   orig_pixels[1 * 4 + 1] = 64u;
41   orig_pixels[1 * 4 + 2] = 192u;
42   orig_pixels[1 * 4 + 3] = 0u;
43   orig_pixels[2 * 4 + 0] = 16u;
44   orig_pixels[2 * 4 + 1] = 64u;
45   orig_pixels[2 * 4 + 2] = 192u;
46   orig_pixels[2 * 4 + 3] = 255u;
47   orig_pixels[3 * 4 + 0] = 16u;
48   orig_pixels[3 * 4 + 1] = 64u;
49   orig_pixels[3 * 4 + 2] = 192u;
50   orig_pixels[3 * 4 + 3] = 128u;
51   ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1);
52   EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
53   EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
54   EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
55   EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]);
56   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]);
57   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
58   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
59   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
60   EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]);
61   EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]);
62   EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]);
63   EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]);
64   EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]);
65   EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]);
66   EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]);
67   EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]);
68 
69   for (int i = 0; i < 1280; ++i) {
70     orig_pixels[i * 4 + 0] = i;
71     orig_pixels[i * 4 + 1] = i / 2;
72     orig_pixels[i * 4 + 2] = i / 3;
73     orig_pixels[i * 4 + 3] = i;
74   }
75   ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1);
76   ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1);
77   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
78     ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
79   }
80   for (int i = 0; i < 1280; ++i) {
81     EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2);
82     EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2);
83     EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2);
84     EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2);
85   }
86   // Make sure transparent, 50% and opaque are fully accurate.
87   EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
88   EXPECT_EQ(0, atten_pixels[0 * 4 + 1]);
89   EXPECT_EQ(0, atten_pixels[0 * 4 + 2]);
90   EXPECT_EQ(0, atten_pixels[0 * 4 + 3]);
91   EXPECT_EQ(64, atten_pixels[128 * 4 + 0]);
92   EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
93   EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
94   EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
95   EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1);
96   EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1);
97   EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1);
98   EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
99 
100   free_aligned_buffer_page_end(atten2_pixels);
101   free_aligned_buffer_page_end(unatten_pixels);
102   free_aligned_buffer_page_end(atten_pixels);
103   free_aligned_buffer_page_end(orig_pixels);
104 }
105 
TestAttenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)106 static int TestAttenuateI(int width,
107                           int height,
108                           int benchmark_iterations,
109                           int disable_cpu_flags,
110                           int benchmark_cpu_info,
111                           int invert,
112                           int off) {
113   if (width < 1) {
114     width = 1;
115   }
116   const int kBpp = 4;
117   const int kStride = width * kBpp;
118   align_buffer_page_end(src_argb, kStride * height + off);
119   align_buffer_page_end(dst_argb_c, kStride * height);
120   align_buffer_page_end(dst_argb_opt, kStride * height);
121   for (int i = 0; i < kStride * height; ++i) {
122     src_argb[i + off] = (fastrand() & 0xff);
123   }
124   memset(dst_argb_c, 0, kStride * height);
125   memset(dst_argb_opt, 0, kStride * height);
126 
127   MaskCpuFlags(disable_cpu_flags);
128   ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
129                 invert * height);
130   MaskCpuFlags(benchmark_cpu_info);
131   for (int i = 0; i < benchmark_iterations; ++i) {
132     ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
133                   invert * height);
134   }
135   int max_diff = 0;
136   for (int i = 0; i < kStride * height; ++i) {
137     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
138                        static_cast<int>(dst_argb_opt[i]));
139     if (abs_diff > max_diff) {
140       max_diff = abs_diff;
141     }
142   }
143   free_aligned_buffer_page_end(src_argb);
144   free_aligned_buffer_page_end(dst_argb_c);
145   free_aligned_buffer_page_end(dst_argb_opt);
146   return max_diff;
147 }
148 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Any)149 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
150   int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_,
151                                 benchmark_iterations_, disable_cpu_flags_,
152                                 benchmark_cpu_info_, +1, 0);
153   EXPECT_LE(max_diff, 2);
154 }
155 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Unaligned)156 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
157   int max_diff =
158       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
159                      disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
160   EXPECT_LE(max_diff, 2);
161 }
162 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Invert)163 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
164   int max_diff =
165       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
166                      disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
167   EXPECT_LE(max_diff, 2);
168 }
169 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Opt)170 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
171   int max_diff =
172       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
173                      disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
174   EXPECT_LE(max_diff, 2);
175 }
176 
TestUnattenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)177 static int TestUnattenuateI(int width,
178                             int height,
179                             int benchmark_iterations,
180                             int disable_cpu_flags,
181                             int benchmark_cpu_info,
182                             int invert,
183                             int off) {
184   if (width < 1) {
185     width = 1;
186   }
187   const int kBpp = 4;
188   const int kStride = width * kBpp;
189   align_buffer_page_end(src_argb, kStride * height + off);
190   align_buffer_page_end(dst_argb_c, kStride * height);
191   align_buffer_page_end(dst_argb_opt, kStride * height);
192   for (int i = 0; i < kStride * height; ++i) {
193     src_argb[i + off] = (fastrand() & 0xff);
194   }
195   ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width,
196                 height);
197   memset(dst_argb_c, 0, kStride * height);
198   memset(dst_argb_opt, 0, kStride * height);
199 
200   MaskCpuFlags(disable_cpu_flags);
201   ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
202                   invert * height);
203   MaskCpuFlags(benchmark_cpu_info);
204   for (int i = 0; i < benchmark_iterations; ++i) {
205     ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
206                     invert * height);
207   }
208   int max_diff = 0;
209   for (int i = 0; i < kStride * height; ++i) {
210     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
211                        static_cast<int>(dst_argb_opt[i]));
212     if (abs_diff > max_diff) {
213       max_diff = abs_diff;
214     }
215   }
216   free_aligned_buffer_page_end(src_argb);
217   free_aligned_buffer_page_end(dst_argb_c);
218   free_aligned_buffer_page_end(dst_argb_opt);
219   return max_diff;
220 }
221 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Any)222 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
223   int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_,
224                                   benchmark_iterations_, disable_cpu_flags_,
225                                   benchmark_cpu_info_, +1, 0);
226   EXPECT_LE(max_diff, 2);
227 }
228 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Unaligned)229 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
230   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
231                                   benchmark_iterations_, disable_cpu_flags_,
232                                   benchmark_cpu_info_, +1, 1);
233   EXPECT_LE(max_diff, 2);
234 }
235 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Invert)236 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
237   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
238                                   benchmark_iterations_, disable_cpu_flags_,
239                                   benchmark_cpu_info_, -1, 0);
240   EXPECT_LE(max_diff, 2);
241 }
242 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Opt)243 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
244   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
245                                   benchmark_iterations_, disable_cpu_flags_,
246                                   benchmark_cpu_info_, +1, 0);
247   EXPECT_LE(max_diff, 2);
248 }
249 
TEST_F(LibYUVPlanarTest,TestARGBComputeCumulativeSum)250 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
251   SIMD_ALIGNED(uint8 orig_pixels[16][16][4]);
252   SIMD_ALIGNED(int32 added_pixels[16][16][4]);
253 
254   for (int y = 0; y < 16; ++y) {
255     for (int x = 0; x < 16; ++x) {
256       orig_pixels[y][x][0] = 1u;
257       orig_pixels[y][x][1] = 2u;
258       orig_pixels[y][x][2] = 3u;
259       orig_pixels[y][x][3] = 255u;
260     }
261   }
262 
263   ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4,
264                            &added_pixels[0][0][0], 16 * 4, 16, 16);
265 
266   for (int y = 0; y < 16; ++y) {
267     for (int x = 0; x < 16; ++x) {
268       EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]);
269       EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]);
270       EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]);
271       EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]);
272     }
273   }
274 }
275 
TEST_F(LibYUVPlanarTest,TestARGBGray)276 TEST_F(LibYUVPlanarTest, TestARGBGray) {
277   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
278   memset(orig_pixels, 0, sizeof(orig_pixels));
279 
280   // Test blue
281   orig_pixels[0][0] = 255u;
282   orig_pixels[0][1] = 0u;
283   orig_pixels[0][2] = 0u;
284   orig_pixels[0][3] = 128u;
285   // Test green
286   orig_pixels[1][0] = 0u;
287   orig_pixels[1][1] = 255u;
288   orig_pixels[1][2] = 0u;
289   orig_pixels[1][3] = 0u;
290   // Test red
291   orig_pixels[2][0] = 0u;
292   orig_pixels[2][1] = 0u;
293   orig_pixels[2][2] = 255u;
294   orig_pixels[2][3] = 255u;
295   // Test black
296   orig_pixels[3][0] = 0u;
297   orig_pixels[3][1] = 0u;
298   orig_pixels[3][2] = 0u;
299   orig_pixels[3][3] = 255u;
300   // Test white
301   orig_pixels[4][0] = 255u;
302   orig_pixels[4][1] = 255u;
303   orig_pixels[4][2] = 255u;
304   orig_pixels[4][3] = 255u;
305   // Test color
306   orig_pixels[5][0] = 16u;
307   orig_pixels[5][1] = 64u;
308   orig_pixels[5][2] = 192u;
309   orig_pixels[5][3] = 224u;
310   // Do 16 to test asm version.
311   ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
312   EXPECT_EQ(30u, orig_pixels[0][0]);
313   EXPECT_EQ(30u, orig_pixels[0][1]);
314   EXPECT_EQ(30u, orig_pixels[0][2]);
315   EXPECT_EQ(128u, orig_pixels[0][3]);
316   EXPECT_EQ(149u, orig_pixels[1][0]);
317   EXPECT_EQ(149u, orig_pixels[1][1]);
318   EXPECT_EQ(149u, orig_pixels[1][2]);
319   EXPECT_EQ(0u, orig_pixels[1][3]);
320   EXPECT_EQ(76u, orig_pixels[2][0]);
321   EXPECT_EQ(76u, orig_pixels[2][1]);
322   EXPECT_EQ(76u, orig_pixels[2][2]);
323   EXPECT_EQ(255u, orig_pixels[2][3]);
324   EXPECT_EQ(0u, orig_pixels[3][0]);
325   EXPECT_EQ(0u, orig_pixels[3][1]);
326   EXPECT_EQ(0u, orig_pixels[3][2]);
327   EXPECT_EQ(255u, orig_pixels[3][3]);
328   EXPECT_EQ(255u, orig_pixels[4][0]);
329   EXPECT_EQ(255u, orig_pixels[4][1]);
330   EXPECT_EQ(255u, orig_pixels[4][2]);
331   EXPECT_EQ(255u, orig_pixels[4][3]);
332   EXPECT_EQ(96u, orig_pixels[5][0]);
333   EXPECT_EQ(96u, orig_pixels[5][1]);
334   EXPECT_EQ(96u, orig_pixels[5][2]);
335   EXPECT_EQ(224u, orig_pixels[5][3]);
336   for (int i = 0; i < 1280; ++i) {
337     orig_pixels[i][0] = i;
338     orig_pixels[i][1] = i / 2;
339     orig_pixels[i][2] = i / 3;
340     orig_pixels[i][3] = i;
341   }
342   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
343     ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
344   }
345 }
346 
TEST_F(LibYUVPlanarTest,TestARGBGrayTo)347 TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
348   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
349   SIMD_ALIGNED(uint8 gray_pixels[1280][4]);
350   memset(orig_pixels, 0, sizeof(orig_pixels));
351 
352   // Test blue
353   orig_pixels[0][0] = 255u;
354   orig_pixels[0][1] = 0u;
355   orig_pixels[0][2] = 0u;
356   orig_pixels[0][3] = 128u;
357   // Test green
358   orig_pixels[1][0] = 0u;
359   orig_pixels[1][1] = 255u;
360   orig_pixels[1][2] = 0u;
361   orig_pixels[1][3] = 0u;
362   // Test red
363   orig_pixels[2][0] = 0u;
364   orig_pixels[2][1] = 0u;
365   orig_pixels[2][2] = 255u;
366   orig_pixels[2][3] = 255u;
367   // Test black
368   orig_pixels[3][0] = 0u;
369   orig_pixels[3][1] = 0u;
370   orig_pixels[3][2] = 0u;
371   orig_pixels[3][3] = 255u;
372   // Test white
373   orig_pixels[4][0] = 255u;
374   orig_pixels[4][1] = 255u;
375   orig_pixels[4][2] = 255u;
376   orig_pixels[4][3] = 255u;
377   // Test color
378   orig_pixels[5][0] = 16u;
379   orig_pixels[5][1] = 64u;
380   orig_pixels[5][2] = 192u;
381   orig_pixels[5][3] = 224u;
382   // Do 16 to test asm version.
383   ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
384   EXPECT_EQ(30u, gray_pixels[0][0]);
385   EXPECT_EQ(30u, gray_pixels[0][1]);
386   EXPECT_EQ(30u, gray_pixels[0][2]);
387   EXPECT_EQ(128u, gray_pixels[0][3]);
388   EXPECT_EQ(149u, gray_pixels[1][0]);
389   EXPECT_EQ(149u, gray_pixels[1][1]);
390   EXPECT_EQ(149u, gray_pixels[1][2]);
391   EXPECT_EQ(0u, gray_pixels[1][3]);
392   EXPECT_EQ(76u, gray_pixels[2][0]);
393   EXPECT_EQ(76u, gray_pixels[2][1]);
394   EXPECT_EQ(76u, gray_pixels[2][2]);
395   EXPECT_EQ(255u, gray_pixels[2][3]);
396   EXPECT_EQ(0u, gray_pixels[3][0]);
397   EXPECT_EQ(0u, gray_pixels[3][1]);
398   EXPECT_EQ(0u, gray_pixels[3][2]);
399   EXPECT_EQ(255u, gray_pixels[3][3]);
400   EXPECT_EQ(255u, gray_pixels[4][0]);
401   EXPECT_EQ(255u, gray_pixels[4][1]);
402   EXPECT_EQ(255u, gray_pixels[4][2]);
403   EXPECT_EQ(255u, gray_pixels[4][3]);
404   EXPECT_EQ(96u, gray_pixels[5][0]);
405   EXPECT_EQ(96u, gray_pixels[5][1]);
406   EXPECT_EQ(96u, gray_pixels[5][2]);
407   EXPECT_EQ(224u, gray_pixels[5][3]);
408   for (int i = 0; i < 1280; ++i) {
409     orig_pixels[i][0] = i;
410     orig_pixels[i][1] = i / 2;
411     orig_pixels[i][2] = i / 3;
412     orig_pixels[i][3] = i;
413   }
414   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
415     ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
416   }
417 }
418 
TEST_F(LibYUVPlanarTest,TestARGBSepia)419 TEST_F(LibYUVPlanarTest, TestARGBSepia) {
420   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
421   memset(orig_pixels, 0, sizeof(orig_pixels));
422 
423   // Test blue
424   orig_pixels[0][0] = 255u;
425   orig_pixels[0][1] = 0u;
426   orig_pixels[0][2] = 0u;
427   orig_pixels[0][3] = 128u;
428   // Test green
429   orig_pixels[1][0] = 0u;
430   orig_pixels[1][1] = 255u;
431   orig_pixels[1][2] = 0u;
432   orig_pixels[1][3] = 0u;
433   // Test red
434   orig_pixels[2][0] = 0u;
435   orig_pixels[2][1] = 0u;
436   orig_pixels[2][2] = 255u;
437   orig_pixels[2][3] = 255u;
438   // Test black
439   orig_pixels[3][0] = 0u;
440   orig_pixels[3][1] = 0u;
441   orig_pixels[3][2] = 0u;
442   orig_pixels[3][3] = 255u;
443   // Test white
444   orig_pixels[4][0] = 255u;
445   orig_pixels[4][1] = 255u;
446   orig_pixels[4][2] = 255u;
447   orig_pixels[4][3] = 255u;
448   // Test color
449   orig_pixels[5][0] = 16u;
450   orig_pixels[5][1] = 64u;
451   orig_pixels[5][2] = 192u;
452   orig_pixels[5][3] = 224u;
453   // Do 16 to test asm version.
454   ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
455   EXPECT_EQ(33u, orig_pixels[0][0]);
456   EXPECT_EQ(43u, orig_pixels[0][1]);
457   EXPECT_EQ(47u, orig_pixels[0][2]);
458   EXPECT_EQ(128u, orig_pixels[0][3]);
459   EXPECT_EQ(135u, orig_pixels[1][0]);
460   EXPECT_EQ(175u, orig_pixels[1][1]);
461   EXPECT_EQ(195u, orig_pixels[1][2]);
462   EXPECT_EQ(0u, orig_pixels[1][3]);
463   EXPECT_EQ(69u, orig_pixels[2][0]);
464   EXPECT_EQ(89u, orig_pixels[2][1]);
465   EXPECT_EQ(99u, orig_pixels[2][2]);
466   EXPECT_EQ(255u, orig_pixels[2][3]);
467   EXPECT_EQ(0u, orig_pixels[3][0]);
468   EXPECT_EQ(0u, orig_pixels[3][1]);
469   EXPECT_EQ(0u, orig_pixels[3][2]);
470   EXPECT_EQ(255u, orig_pixels[3][3]);
471   EXPECT_EQ(239u, orig_pixels[4][0]);
472   EXPECT_EQ(255u, orig_pixels[4][1]);
473   EXPECT_EQ(255u, orig_pixels[4][2]);
474   EXPECT_EQ(255u, orig_pixels[4][3]);
475   EXPECT_EQ(88u, orig_pixels[5][0]);
476   EXPECT_EQ(114u, orig_pixels[5][1]);
477   EXPECT_EQ(127u, orig_pixels[5][2]);
478   EXPECT_EQ(224u, orig_pixels[5][3]);
479 
480   for (int i = 0; i < 1280; ++i) {
481     orig_pixels[i][0] = i;
482     orig_pixels[i][1] = i / 2;
483     orig_pixels[i][2] = i / 3;
484     orig_pixels[i][3] = i;
485   }
486   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
487     ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
488   }
489 }
490 
TEST_F(LibYUVPlanarTest,TestARGBColorMatrix)491 TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
492   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
493   SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
494   SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
495 
496   // Matrix for Sepia.
497   SIMD_ALIGNED(static const int8 kRGBToSepia[]) = {
498       17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
499       24 / 2, 98 / 2, 50 / 2, 0, 0,      0,      0,      64,  // Copy alpha.
500   };
501   memset(orig_pixels, 0, sizeof(orig_pixels));
502 
503   // Test blue
504   orig_pixels[0][0] = 255u;
505   orig_pixels[0][1] = 0u;
506   orig_pixels[0][2] = 0u;
507   orig_pixels[0][3] = 128u;
508   // Test green
509   orig_pixels[1][0] = 0u;
510   orig_pixels[1][1] = 255u;
511   orig_pixels[1][2] = 0u;
512   orig_pixels[1][3] = 0u;
513   // Test red
514   orig_pixels[2][0] = 0u;
515   orig_pixels[2][1] = 0u;
516   orig_pixels[2][2] = 255u;
517   orig_pixels[2][3] = 255u;
518   // Test color
519   orig_pixels[3][0] = 16u;
520   orig_pixels[3][1] = 64u;
521   orig_pixels[3][2] = 192u;
522   orig_pixels[3][3] = 224u;
523   // Do 16 to test asm version.
524   ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
525                   &kRGBToSepia[0], 16, 1);
526   EXPECT_EQ(31u, dst_pixels_opt[0][0]);
527   EXPECT_EQ(43u, dst_pixels_opt[0][1]);
528   EXPECT_EQ(47u, dst_pixels_opt[0][2]);
529   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
530   EXPECT_EQ(135u, dst_pixels_opt[1][0]);
531   EXPECT_EQ(175u, dst_pixels_opt[1][1]);
532   EXPECT_EQ(195u, dst_pixels_opt[1][2]);
533   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
534   EXPECT_EQ(67u, dst_pixels_opt[2][0]);
535   EXPECT_EQ(87u, dst_pixels_opt[2][1]);
536   EXPECT_EQ(99u, dst_pixels_opt[2][2]);
537   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
538   EXPECT_EQ(87u, dst_pixels_opt[3][0]);
539   EXPECT_EQ(112u, dst_pixels_opt[3][1]);
540   EXPECT_EQ(127u, dst_pixels_opt[3][2]);
541   EXPECT_EQ(224u, dst_pixels_opt[3][3]);
542 
543   for (int i = 0; i < 1280; ++i) {
544     orig_pixels[i][0] = i;
545     orig_pixels[i][1] = i / 2;
546     orig_pixels[i][2] = i / 3;
547     orig_pixels[i][3] = i;
548   }
549   MaskCpuFlags(disable_cpu_flags_);
550   ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
551                   &kRGBToSepia[0], 1280, 1);
552   MaskCpuFlags(benchmark_cpu_info_);
553 
554   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
555     ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
556                     &kRGBToSepia[0], 1280, 1);
557   }
558 
559   for (int i = 0; i < 1280; ++i) {
560     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
561     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
562     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
563     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
564   }
565 }
566 
TEST_F(LibYUVPlanarTest,TestRGBColorMatrix)567 TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
568   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
569 
570   // Matrix for Sepia.
571   SIMD_ALIGNED(static const int8 kRGBToSepia[]) = {
572       17, 68, 35, 0, 22, 88, 45, 0,
573       24, 98, 50, 0, 0,  0,  0,  0,  // Unused but makes matrix 16 bytes.
574   };
575   memset(orig_pixels, 0, sizeof(orig_pixels));
576 
577   // Test blue
578   orig_pixels[0][0] = 255u;
579   orig_pixels[0][1] = 0u;
580   orig_pixels[0][2] = 0u;
581   orig_pixels[0][3] = 128u;
582   // Test green
583   orig_pixels[1][0] = 0u;
584   orig_pixels[1][1] = 255u;
585   orig_pixels[1][2] = 0u;
586   orig_pixels[1][3] = 0u;
587   // Test red
588   orig_pixels[2][0] = 0u;
589   orig_pixels[2][1] = 0u;
590   orig_pixels[2][2] = 255u;
591   orig_pixels[2][3] = 255u;
592   // Test color
593   orig_pixels[3][0] = 16u;
594   orig_pixels[3][1] = 64u;
595   orig_pixels[3][2] = 192u;
596   orig_pixels[3][3] = 224u;
597   // Do 16 to test asm version.
598   RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
599   EXPECT_EQ(31u, orig_pixels[0][0]);
600   EXPECT_EQ(43u, orig_pixels[0][1]);
601   EXPECT_EQ(47u, orig_pixels[0][2]);
602   EXPECT_EQ(128u, orig_pixels[0][3]);
603   EXPECT_EQ(135u, orig_pixels[1][0]);
604   EXPECT_EQ(175u, orig_pixels[1][1]);
605   EXPECT_EQ(195u, orig_pixels[1][2]);
606   EXPECT_EQ(0u, orig_pixels[1][3]);
607   EXPECT_EQ(67u, orig_pixels[2][0]);
608   EXPECT_EQ(87u, orig_pixels[2][1]);
609   EXPECT_EQ(99u, orig_pixels[2][2]);
610   EXPECT_EQ(255u, orig_pixels[2][3]);
611   EXPECT_EQ(87u, orig_pixels[3][0]);
612   EXPECT_EQ(112u, orig_pixels[3][1]);
613   EXPECT_EQ(127u, orig_pixels[3][2]);
614   EXPECT_EQ(224u, orig_pixels[3][3]);
615 
616   for (int i = 0; i < 1280; ++i) {
617     orig_pixels[i][0] = i;
618     orig_pixels[i][1] = i / 2;
619     orig_pixels[i][2] = i / 3;
620     orig_pixels[i][3] = i;
621   }
622   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
623     RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
624   }
625 }
626 
TEST_F(LibYUVPlanarTest,TestARGBColorTable)627 TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
628   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
629   memset(orig_pixels, 0, sizeof(orig_pixels));
630 
631   // Matrix for Sepia.
632   static const uint8 kARGBTable[256 * 4] = {
633       1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
634   };
635 
636   orig_pixels[0][0] = 0u;
637   orig_pixels[0][1] = 0u;
638   orig_pixels[0][2] = 0u;
639   orig_pixels[0][3] = 0u;
640   orig_pixels[1][0] = 1u;
641   orig_pixels[1][1] = 1u;
642   orig_pixels[1][2] = 1u;
643   orig_pixels[1][3] = 1u;
644   orig_pixels[2][0] = 2u;
645   orig_pixels[2][1] = 2u;
646   orig_pixels[2][2] = 2u;
647   orig_pixels[2][3] = 2u;
648   orig_pixels[3][0] = 0u;
649   orig_pixels[3][1] = 1u;
650   orig_pixels[3][2] = 2u;
651   orig_pixels[3][3] = 3u;
652   // Do 16 to test asm version.
653   ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
654   EXPECT_EQ(1u, orig_pixels[0][0]);
655   EXPECT_EQ(2u, orig_pixels[0][1]);
656   EXPECT_EQ(3u, orig_pixels[0][2]);
657   EXPECT_EQ(4u, orig_pixels[0][3]);
658   EXPECT_EQ(5u, orig_pixels[1][0]);
659   EXPECT_EQ(6u, orig_pixels[1][1]);
660   EXPECT_EQ(7u, orig_pixels[1][2]);
661   EXPECT_EQ(8u, orig_pixels[1][3]);
662   EXPECT_EQ(9u, orig_pixels[2][0]);
663   EXPECT_EQ(10u, orig_pixels[2][1]);
664   EXPECT_EQ(11u, orig_pixels[2][2]);
665   EXPECT_EQ(12u, orig_pixels[2][3]);
666   EXPECT_EQ(1u, orig_pixels[3][0]);
667   EXPECT_EQ(6u, orig_pixels[3][1]);
668   EXPECT_EQ(11u, orig_pixels[3][2]);
669   EXPECT_EQ(16u, orig_pixels[3][3]);
670 
671   for (int i = 0; i < 1280; ++i) {
672     orig_pixels[i][0] = i;
673     orig_pixels[i][1] = i / 2;
674     orig_pixels[i][2] = i / 3;
675     orig_pixels[i][3] = i;
676   }
677   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
678     ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
679   }
680 }
681 
682 // Same as TestARGBColorTable except alpha does not change.
TEST_F(LibYUVPlanarTest,TestRGBColorTable)683 TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
684   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
685   memset(orig_pixels, 0, sizeof(orig_pixels));
686 
687   // Matrix for Sepia.
688   static const uint8 kARGBTable[256 * 4] = {
689       1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
690   };
691 
692   orig_pixels[0][0] = 0u;
693   orig_pixels[0][1] = 0u;
694   orig_pixels[0][2] = 0u;
695   orig_pixels[0][3] = 0u;
696   orig_pixels[1][0] = 1u;
697   orig_pixels[1][1] = 1u;
698   orig_pixels[1][2] = 1u;
699   orig_pixels[1][3] = 1u;
700   orig_pixels[2][0] = 2u;
701   orig_pixels[2][1] = 2u;
702   orig_pixels[2][2] = 2u;
703   orig_pixels[2][3] = 2u;
704   orig_pixels[3][0] = 0u;
705   orig_pixels[3][1] = 1u;
706   orig_pixels[3][2] = 2u;
707   orig_pixels[3][3] = 3u;
708   // Do 16 to test asm version.
709   RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
710   EXPECT_EQ(1u, orig_pixels[0][0]);
711   EXPECT_EQ(2u, orig_pixels[0][1]);
712   EXPECT_EQ(3u, orig_pixels[0][2]);
713   EXPECT_EQ(0u, orig_pixels[0][3]);  // Alpha unchanged.
714   EXPECT_EQ(5u, orig_pixels[1][0]);
715   EXPECT_EQ(6u, orig_pixels[1][1]);
716   EXPECT_EQ(7u, orig_pixels[1][2]);
717   EXPECT_EQ(1u, orig_pixels[1][3]);  // Alpha unchanged.
718   EXPECT_EQ(9u, orig_pixels[2][0]);
719   EXPECT_EQ(10u, orig_pixels[2][1]);
720   EXPECT_EQ(11u, orig_pixels[2][2]);
721   EXPECT_EQ(2u, orig_pixels[2][3]);  // Alpha unchanged.
722   EXPECT_EQ(1u, orig_pixels[3][0]);
723   EXPECT_EQ(6u, orig_pixels[3][1]);
724   EXPECT_EQ(11u, orig_pixels[3][2]);
725   EXPECT_EQ(3u, orig_pixels[3][3]);  // Alpha unchanged.
726 
727   for (int i = 0; i < 1280; ++i) {
728     orig_pixels[i][0] = i;
729     orig_pixels[i][1] = i / 2;
730     orig_pixels[i][2] = i / 3;
731     orig_pixels[i][3] = i;
732   }
733   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
734     RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
735   }
736 }
737 
TEST_F(LibYUVPlanarTest,TestARGBQuantize)738 TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
739   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
740 
741   for (int i = 0; i < 1280; ++i) {
742     orig_pixels[i][0] = i;
743     orig_pixels[i][1] = i / 2;
744     orig_pixels[i][2] = i / 3;
745     orig_pixels[i][3] = i;
746   }
747   ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
748                1280, 1);
749 
750   for (int i = 0; i < 1280; ++i) {
751     EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]);
752     EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]);
753     EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]);
754     EXPECT_EQ(i & 255, orig_pixels[i][3]);
755   }
756   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
757     ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
758                  1280, 1);
759   }
760 }
761 
TEST_F(LibYUVPlanarTest,TestARGBMirror)762 TEST_F(LibYUVPlanarTest, TestARGBMirror) {
763   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
764   SIMD_ALIGNED(uint8 dst_pixels[1280][4]);
765 
766   for (int i = 0; i < 1280; ++i) {
767     orig_pixels[i][0] = i;
768     orig_pixels[i][1] = i / 2;
769     orig_pixels[i][2] = i / 3;
770     orig_pixels[i][3] = i / 4;
771   }
772   ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
773 
774   for (int i = 0; i < 1280; ++i) {
775     EXPECT_EQ(i & 255, dst_pixels[1280 - 1 - i][0]);
776     EXPECT_EQ((i / 2) & 255, dst_pixels[1280 - 1 - i][1]);
777     EXPECT_EQ((i / 3) & 255, dst_pixels[1280 - 1 - i][2]);
778     EXPECT_EQ((i / 4) & 255, dst_pixels[1280 - 1 - i][3]);
779   }
780   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
781     ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
782   }
783 }
784 
TEST_F(LibYUVPlanarTest,TestShade)785 TEST_F(LibYUVPlanarTest, TestShade) {
786   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
787   SIMD_ALIGNED(uint8 shade_pixels[1280][4]);
788   memset(orig_pixels, 0, sizeof(orig_pixels));
789 
790   orig_pixels[0][0] = 10u;
791   orig_pixels[0][1] = 20u;
792   orig_pixels[0][2] = 40u;
793   orig_pixels[0][3] = 80u;
794   orig_pixels[1][0] = 0u;
795   orig_pixels[1][1] = 0u;
796   orig_pixels[1][2] = 0u;
797   orig_pixels[1][3] = 255u;
798   orig_pixels[2][0] = 0u;
799   orig_pixels[2][1] = 0u;
800   orig_pixels[2][2] = 0u;
801   orig_pixels[2][3] = 0u;
802   orig_pixels[3][0] = 0u;
803   orig_pixels[3][1] = 0u;
804   orig_pixels[3][2] = 0u;
805   orig_pixels[3][3] = 0u;
806   // Do 8 pixels to allow opt version to be used.
807   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff);
808   EXPECT_EQ(10u, shade_pixels[0][0]);
809   EXPECT_EQ(20u, shade_pixels[0][1]);
810   EXPECT_EQ(40u, shade_pixels[0][2]);
811   EXPECT_EQ(40u, shade_pixels[0][3]);
812   EXPECT_EQ(0u, shade_pixels[1][0]);
813   EXPECT_EQ(0u, shade_pixels[1][1]);
814   EXPECT_EQ(0u, shade_pixels[1][2]);
815   EXPECT_EQ(128u, shade_pixels[1][3]);
816   EXPECT_EQ(0u, shade_pixels[2][0]);
817   EXPECT_EQ(0u, shade_pixels[2][1]);
818   EXPECT_EQ(0u, shade_pixels[2][2]);
819   EXPECT_EQ(0u, shade_pixels[2][3]);
820   EXPECT_EQ(0u, shade_pixels[3][0]);
821   EXPECT_EQ(0u, shade_pixels[3][1]);
822   EXPECT_EQ(0u, shade_pixels[3][2]);
823   EXPECT_EQ(0u, shade_pixels[3][3]);
824 
825   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080);
826   EXPECT_EQ(5u, shade_pixels[0][0]);
827   EXPECT_EQ(10u, shade_pixels[0][1]);
828   EXPECT_EQ(20u, shade_pixels[0][2]);
829   EXPECT_EQ(40u, shade_pixels[0][3]);
830 
831   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080);
832   EXPECT_EQ(5u, shade_pixels[0][0]);
833   EXPECT_EQ(5u, shade_pixels[0][1]);
834   EXPECT_EQ(5u, shade_pixels[0][2]);
835   EXPECT_EQ(5u, shade_pixels[0][3]);
836 
837   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
838     ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1,
839               0x80808080);
840   }
841 }
842 
TEST_F(LibYUVPlanarTest,TestARGBInterpolate)843 TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
844   SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);
845   SIMD_ALIGNED(uint8 orig_pixels_1[1280][4]);
846   SIMD_ALIGNED(uint8 interpolate_pixels[1280][4]);
847   memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
848   memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
849 
850   orig_pixels_0[0][0] = 16u;
851   orig_pixels_0[0][1] = 32u;
852   orig_pixels_0[0][2] = 64u;
853   orig_pixels_0[0][3] = 128u;
854   orig_pixels_0[1][0] = 0u;
855   orig_pixels_0[1][1] = 0u;
856   orig_pixels_0[1][2] = 0u;
857   orig_pixels_0[1][3] = 255u;
858   orig_pixels_0[2][0] = 0u;
859   orig_pixels_0[2][1] = 0u;
860   orig_pixels_0[2][2] = 0u;
861   orig_pixels_0[2][3] = 0u;
862   orig_pixels_0[3][0] = 0u;
863   orig_pixels_0[3][1] = 0u;
864   orig_pixels_0[3][2] = 0u;
865   orig_pixels_0[3][3] = 0u;
866 
867   orig_pixels_1[0][0] = 0u;
868   orig_pixels_1[0][1] = 0u;
869   orig_pixels_1[0][2] = 0u;
870   orig_pixels_1[0][3] = 0u;
871   orig_pixels_1[1][0] = 0u;
872   orig_pixels_1[1][1] = 0u;
873   orig_pixels_1[1][2] = 0u;
874   orig_pixels_1[1][3] = 0u;
875   orig_pixels_1[2][0] = 0u;
876   orig_pixels_1[2][1] = 0u;
877   orig_pixels_1[2][2] = 0u;
878   orig_pixels_1[2][3] = 0u;
879   orig_pixels_1[3][0] = 255u;
880   orig_pixels_1[3][1] = 255u;
881   orig_pixels_1[3][2] = 255u;
882   orig_pixels_1[3][3] = 255u;
883 
884   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
885                   &interpolate_pixels[0][0], 0, 4, 1, 128);
886   EXPECT_EQ(8u, interpolate_pixels[0][0]);
887   EXPECT_EQ(16u, interpolate_pixels[0][1]);
888   EXPECT_EQ(32u, interpolate_pixels[0][2]);
889   EXPECT_EQ(64u, interpolate_pixels[0][3]);
890   EXPECT_EQ(0u, interpolate_pixels[1][0]);
891   EXPECT_EQ(0u, interpolate_pixels[1][1]);
892   EXPECT_EQ(0u, interpolate_pixels[1][2]);
893   EXPECT_EQ(128u, interpolate_pixels[1][3]);
894   EXPECT_EQ(0u, interpolate_pixels[2][0]);
895   EXPECT_EQ(0u, interpolate_pixels[2][1]);
896   EXPECT_EQ(0u, interpolate_pixels[2][2]);
897   EXPECT_EQ(0u, interpolate_pixels[2][3]);
898   EXPECT_EQ(128u, interpolate_pixels[3][0]);
899   EXPECT_EQ(128u, interpolate_pixels[3][1]);
900   EXPECT_EQ(128u, interpolate_pixels[3][2]);
901   EXPECT_EQ(128u, interpolate_pixels[3][3]);
902 
903   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
904                   &interpolate_pixels[0][0], 0, 4, 1, 0);
905   EXPECT_EQ(16u, interpolate_pixels[0][0]);
906   EXPECT_EQ(32u, interpolate_pixels[0][1]);
907   EXPECT_EQ(64u, interpolate_pixels[0][2]);
908   EXPECT_EQ(128u, interpolate_pixels[0][3]);
909 
910   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
911                   &interpolate_pixels[0][0], 0, 4, 1, 192);
912 
913   EXPECT_EQ(4u, interpolate_pixels[0][0]);
914   EXPECT_EQ(8u, interpolate_pixels[0][1]);
915   EXPECT_EQ(16u, interpolate_pixels[0][2]);
916   EXPECT_EQ(32u, interpolate_pixels[0][3]);
917 
918   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
919     ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
920                     &interpolate_pixels[0][0], 0, 1280, 1, 128);
921   }
922 }
923 
TEST_F(LibYUVPlanarTest,TestInterpolatePlane)924 TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
925   SIMD_ALIGNED(uint8 orig_pixels_0[1280]);
926   SIMD_ALIGNED(uint8 orig_pixels_1[1280]);
927   SIMD_ALIGNED(uint8 interpolate_pixels[1280]);
928   memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
929   memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
930 
931   orig_pixels_0[0] = 16u;
932   orig_pixels_0[1] = 32u;
933   orig_pixels_0[2] = 64u;
934   orig_pixels_0[3] = 128u;
935   orig_pixels_0[4] = 0u;
936   orig_pixels_0[5] = 0u;
937   orig_pixels_0[6] = 0u;
938   orig_pixels_0[7] = 255u;
939   orig_pixels_0[8] = 0u;
940   orig_pixels_0[9] = 0u;
941   orig_pixels_0[10] = 0u;
942   orig_pixels_0[11] = 0u;
943   orig_pixels_0[12] = 0u;
944   orig_pixels_0[13] = 0u;
945   orig_pixels_0[14] = 0u;
946   orig_pixels_0[15] = 0u;
947 
948   orig_pixels_1[0] = 0u;
949   orig_pixels_1[1] = 0u;
950   orig_pixels_1[2] = 0u;
951   orig_pixels_1[3] = 0u;
952   orig_pixels_1[4] = 0u;
953   orig_pixels_1[5] = 0u;
954   orig_pixels_1[6] = 0u;
955   orig_pixels_1[7] = 0u;
956   orig_pixels_1[8] = 0u;
957   orig_pixels_1[9] = 0u;
958   orig_pixels_1[10] = 0u;
959   orig_pixels_1[11] = 0u;
960   orig_pixels_1[12] = 255u;
961   orig_pixels_1[13] = 255u;
962   orig_pixels_1[14] = 255u;
963   orig_pixels_1[15] = 255u;
964 
965   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
966                    &interpolate_pixels[0], 0, 16, 1, 128);
967   EXPECT_EQ(8u, interpolate_pixels[0]);
968   EXPECT_EQ(16u, interpolate_pixels[1]);
969   EXPECT_EQ(32u, interpolate_pixels[2]);
970   EXPECT_EQ(64u, interpolate_pixels[3]);
971   EXPECT_EQ(0u, interpolate_pixels[4]);
972   EXPECT_EQ(0u, interpolate_pixels[5]);
973   EXPECT_EQ(0u, interpolate_pixels[6]);
974   EXPECT_EQ(128u, interpolate_pixels[7]);
975   EXPECT_EQ(0u, interpolate_pixels[8]);
976   EXPECT_EQ(0u, interpolate_pixels[9]);
977   EXPECT_EQ(0u, interpolate_pixels[10]);
978   EXPECT_EQ(0u, interpolate_pixels[11]);
979   EXPECT_EQ(128u, interpolate_pixels[12]);
980   EXPECT_EQ(128u, interpolate_pixels[13]);
981   EXPECT_EQ(128u, interpolate_pixels[14]);
982   EXPECT_EQ(128u, interpolate_pixels[15]);
983 
984   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
985                    &interpolate_pixels[0], 0, 16, 1, 0);
986   EXPECT_EQ(16u, interpolate_pixels[0]);
987   EXPECT_EQ(32u, interpolate_pixels[1]);
988   EXPECT_EQ(64u, interpolate_pixels[2]);
989   EXPECT_EQ(128u, interpolate_pixels[3]);
990 
991   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
992                    &interpolate_pixels[0], 0, 16, 1, 192);
993 
994   EXPECT_EQ(4u, interpolate_pixels[0]);
995   EXPECT_EQ(8u, interpolate_pixels[1]);
996   EXPECT_EQ(16u, interpolate_pixels[2]);
997   EXPECT_EQ(32u, interpolate_pixels[3]);
998 
999   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1000     InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1001                      &interpolate_pixels[0], 0, 1280, 1, 123);
1002   }
1003 }
1004 
1005 #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
1006                  N, NEG, OFF)                                                 \
1007   TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) {                        \
1008     const int kWidth = ((W1280) > 0) ? (W1280) : 1;                           \
1009     const int kHeight = benchmark_height_;                                    \
1010     const int kStrideA =                                                      \
1011         (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;                \
1012     const int kStrideB =                                                      \
1013         (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;                \
1014     align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF);               \
1015     align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF);               \
1016     align_buffer_page_end(dst_argb_c, kStrideB* kHeight);                     \
1017     align_buffer_page_end(dst_argb_opt, kStrideB* kHeight);                   \
1018     for (int i = 0; i < kStrideA * kHeight; ++i) {                            \
1019       src_argb_a[i + OFF] = (fastrand() & 0xff);                              \
1020       src_argb_b[i + OFF] = (fastrand() & 0xff);                              \
1021     }                                                                         \
1022     MaskCpuFlags(disable_cpu_flags_);                                         \
1023     ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA,   \
1024                     dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP);         \
1025     MaskCpuFlags(benchmark_cpu_info_);                                        \
1026     for (int i = 0; i < benchmark_iterations_; ++i) {                         \
1027       ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1028                       dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP);     \
1029     }                                                                         \
1030     for (int i = 0; i < kStrideB * kHeight; ++i) {                            \
1031       EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                              \
1032     }                                                                         \
1033     free_aligned_buffer_page_end(src_argb_a);                                 \
1034     free_aligned_buffer_page_end(src_argb_b);                                 \
1035     free_aligned_buffer_page_end(dst_argb_c);                                 \
1036     free_aligned_buffer_page_end(dst_argb_opt);                               \
1037   }
1038 
1039 #define TESTINTERPOLATE(TERP)                                                \
1040   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ - 1, TERP, _Any, +, 0)   \
1041   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
1042   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0)    \
1043   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
1044 
1045 TESTINTERPOLATE(0)
1046 TESTINTERPOLATE(64)
1047 TESTINTERPOLATE(128)
1048 TESTINTERPOLATE(192)
1049 TESTINTERPOLATE(255)
1050 
TestBlend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1051 static int TestBlend(int width,
1052                      int height,
1053                      int benchmark_iterations,
1054                      int disable_cpu_flags,
1055                      int benchmark_cpu_info,
1056                      int invert,
1057                      int off) {
1058   if (width < 1) {
1059     width = 1;
1060   }
1061   const int kBpp = 4;
1062   const int kStride = width * kBpp;
1063   align_buffer_page_end(src_argb_a, kStride * height + off);
1064   align_buffer_page_end(src_argb_b, kStride * height + off);
1065   align_buffer_page_end(dst_argb_c, kStride * height);
1066   align_buffer_page_end(dst_argb_opt, kStride * height);
1067   for (int i = 0; i < kStride * height; ++i) {
1068     src_argb_a[i + off] = (fastrand() & 0xff);
1069     src_argb_b[i + off] = (fastrand() & 0xff);
1070   }
1071   ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
1072                 height);
1073   ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width,
1074                 height);
1075   memset(dst_argb_c, 255, kStride * height);
1076   memset(dst_argb_opt, 255, kStride * height);
1077 
1078   MaskCpuFlags(disable_cpu_flags);
1079   ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1080             kStride, width, invert * height);
1081   MaskCpuFlags(benchmark_cpu_info);
1082   for (int i = 0; i < benchmark_iterations; ++i) {
1083     ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride,
1084               dst_argb_opt, kStride, width, invert * height);
1085   }
1086   int max_diff = 0;
1087   for (int i = 0; i < kStride * height; ++i) {
1088     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1089                        static_cast<int>(dst_argb_opt[i]));
1090     if (abs_diff > max_diff) {
1091       max_diff = abs_diff;
1092     }
1093   }
1094   free_aligned_buffer_page_end(src_argb_a);
1095   free_aligned_buffer_page_end(src_argb_b);
1096   free_aligned_buffer_page_end(dst_argb_c);
1097   free_aligned_buffer_page_end(dst_argb_opt);
1098   return max_diff;
1099 }
1100 
TEST_F(LibYUVPlanarTest,ARGBBlend_Any)1101 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
1102   int max_diff =
1103       TestBlend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1104                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1105   EXPECT_LE(max_diff, 1);
1106 }
1107 
TEST_F(LibYUVPlanarTest,ARGBBlend_Unaligned)1108 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
1109   int max_diff =
1110       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1111                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1112   EXPECT_LE(max_diff, 1);
1113 }
1114 
TEST_F(LibYUVPlanarTest,ARGBBlend_Invert)1115 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
1116   int max_diff =
1117       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1118                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1119   EXPECT_LE(max_diff, 1);
1120 }
1121 
TEST_F(LibYUVPlanarTest,ARGBBlend_Opt)1122 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
1123   int max_diff =
1124       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1125                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1126   EXPECT_LE(max_diff, 1);
1127 }
1128 
TestBlendPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1129 static void TestBlendPlane(int width,
1130                            int height,
1131                            int benchmark_iterations,
1132                            int disable_cpu_flags,
1133                            int benchmark_cpu_info,
1134                            int invert,
1135                            int off) {
1136   if (width < 1) {
1137     width = 1;
1138   }
1139   const int kBpp = 1;
1140   const int kStride = width * kBpp;
1141   align_buffer_page_end(src_argb_a, kStride * height + off);
1142   align_buffer_page_end(src_argb_b, kStride * height + off);
1143   align_buffer_page_end(src_argb_alpha, kStride * height + off);
1144   align_buffer_page_end(dst_argb_c, kStride * height + off);
1145   align_buffer_page_end(dst_argb_opt, kStride * height + off);
1146   memset(dst_argb_c, 255, kStride * height + off);
1147   memset(dst_argb_opt, 255, kStride * height + off);
1148 
1149   // Test source is maintained exactly if alpha is 255.
1150   for (int i = 0; i < width; ++i) {
1151     src_argb_a[i + off] = i & 255;
1152     src_argb_b[i + off] = 255 - (i & 255);
1153   }
1154   memset(src_argb_alpha + off, 255, width);
1155   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1156              src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1157   for (int i = 0; i < width; ++i) {
1158     EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
1159   }
1160   // Test destination is maintained exactly if alpha is 0.
1161   memset(src_argb_alpha + off, 0, width);
1162   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1163              src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1164   for (int i = 0; i < width; ++i) {
1165     EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
1166   }
1167   for (int i = 0; i < kStride * height; ++i) {
1168     src_argb_a[i + off] = (fastrand() & 0xff);
1169     src_argb_b[i + off] = (fastrand() & 0xff);
1170     src_argb_alpha[i + off] = (fastrand() & 0xff);
1171   }
1172 
1173   MaskCpuFlags(disable_cpu_flags);
1174   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1175              src_argb_alpha + off, width, dst_argb_c + off, width, width,
1176              invert * height);
1177   MaskCpuFlags(benchmark_cpu_info);
1178   for (int i = 0; i < benchmark_iterations; ++i) {
1179     BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1180                src_argb_alpha + off, width, dst_argb_opt + off, width, width,
1181                invert * height);
1182   }
1183   for (int i = 0; i < kStride * height; ++i) {
1184     EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
1185   }
1186   free_aligned_buffer_page_end(src_argb_a);
1187   free_aligned_buffer_page_end(src_argb_b);
1188   free_aligned_buffer_page_end(src_argb_alpha);
1189   free_aligned_buffer_page_end(dst_argb_c);
1190   free_aligned_buffer_page_end(dst_argb_opt);
1191   return;
1192 }
1193 
TEST_F(LibYUVPlanarTest,BlendPlane_Opt)1194 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
1195   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1196                  disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1197 }
TEST_F(LibYUVPlanarTest,BlendPlane_Unaligned)1198 TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
1199   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1200                  disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1201 }
TEST_F(LibYUVPlanarTest,BlendPlane_Any)1202 TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
1203   TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1204                  disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1205 }
TEST_F(LibYUVPlanarTest,BlendPlane_Invert)1206 TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
1207   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1208                  disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
1209 }
1210 
1211 #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
1212 
TestI420Blend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1213 static void TestI420Blend(int width,
1214                           int height,
1215                           int benchmark_iterations,
1216                           int disable_cpu_flags,
1217                           int benchmark_cpu_info,
1218                           int invert,
1219                           int off) {
1220   width = ((width) > 0) ? (width) : 1;
1221   const int kStrideUV = SUBSAMPLE(width, 2);
1222   const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
1223   align_buffer_page_end(src_y0, width * height + off);
1224   align_buffer_page_end(src_u0, kSizeUV + off);
1225   align_buffer_page_end(src_v0, kSizeUV + off);
1226   align_buffer_page_end(src_y1, width * height + off);
1227   align_buffer_page_end(src_u1, kSizeUV + off);
1228   align_buffer_page_end(src_v1, kSizeUV + off);
1229   align_buffer_page_end(src_a, width * height + off);
1230   align_buffer_page_end(dst_y_c, width * height + off);
1231   align_buffer_page_end(dst_u_c, kSizeUV + off);
1232   align_buffer_page_end(dst_v_c, kSizeUV + off);
1233   align_buffer_page_end(dst_y_opt, width * height + off);
1234   align_buffer_page_end(dst_u_opt, kSizeUV + off);
1235   align_buffer_page_end(dst_v_opt, kSizeUV + off);
1236 
1237   MemRandomize(src_y0, width * height + off);
1238   MemRandomize(src_u0, kSizeUV + off);
1239   MemRandomize(src_v0, kSizeUV + off);
1240   MemRandomize(src_y1, width * height + off);
1241   MemRandomize(src_u1, kSizeUV + off);
1242   MemRandomize(src_v1, kSizeUV + off);
1243   MemRandomize(src_a, width * height + off);
1244   memset(dst_y_c, 255, width * height + off);
1245   memset(dst_u_c, 255, kSizeUV + off);
1246   memset(dst_v_c, 255, kSizeUV + off);
1247   memset(dst_y_opt, 255, width * height + off);
1248   memset(dst_u_opt, 255, kSizeUV + off);
1249   memset(dst_v_opt, 255, kSizeUV + off);
1250 
1251   MaskCpuFlags(disable_cpu_flags);
1252   I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1253             kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1254             src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width,
1255             dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width,
1256             invert * height);
1257   MaskCpuFlags(benchmark_cpu_info);
1258   for (int i = 0; i < benchmark_iterations; ++i) {
1259     I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1260               kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1261               src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off,
1262               width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV,
1263               width, invert * height);
1264   }
1265   for (int i = 0; i < width * height; ++i) {
1266     EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
1267   }
1268   for (int i = 0; i < kSizeUV; ++i) {
1269     EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]);
1270     EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]);
1271   }
1272   free_aligned_buffer_page_end(src_y0);
1273   free_aligned_buffer_page_end(src_u0);
1274   free_aligned_buffer_page_end(src_v0);
1275   free_aligned_buffer_page_end(src_y1);
1276   free_aligned_buffer_page_end(src_u1);
1277   free_aligned_buffer_page_end(src_v1);
1278   free_aligned_buffer_page_end(src_a);
1279   free_aligned_buffer_page_end(dst_y_c);
1280   free_aligned_buffer_page_end(dst_u_c);
1281   free_aligned_buffer_page_end(dst_v_c);
1282   free_aligned_buffer_page_end(dst_y_opt);
1283   free_aligned_buffer_page_end(dst_u_opt);
1284   free_aligned_buffer_page_end(dst_v_opt);
1285   return;
1286 }
1287 
TEST_F(LibYUVPlanarTest,I420Blend_Opt)1288 TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
1289   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1290                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1291 }
TEST_F(LibYUVPlanarTest,I420Blend_Unaligned)1292 TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
1293   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1294                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1295 }
1296 
1297 // TODO(fbarchard): DISABLED because _Any uses C.  Avoid C and re-enable.
TEST_F(LibYUVPlanarTest,DISABLED_I420Blend_Any)1298 TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
1299   TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1300                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1301 }
TEST_F(LibYUVPlanarTest,I420Blend_Invert)1302 TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
1303   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1304                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1305 }
1306 
TEST_F(LibYUVPlanarTest,TestAffine)1307 TEST_F(LibYUVPlanarTest, TestAffine) {
1308   SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);
1309   SIMD_ALIGNED(uint8 interpolate_pixels_C[1280][4]);
1310 
1311   for (int i = 0; i < 1280; ++i) {
1312     for (int j = 0; j < 4; ++j) {
1313       orig_pixels_0[i][j] = i;
1314     }
1315   }
1316 
1317   float uv_step[4] = {0.f, 0.f, 0.75f, 0.f};
1318 
1319   ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step,
1320                   1280);
1321   EXPECT_EQ(0u, interpolate_pixels_C[0][0]);
1322   EXPECT_EQ(96u, interpolate_pixels_C[128][0]);
1323   EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
1324 
1325 #if defined(HAS_ARGBAFFINEROW_SSE2)
1326   SIMD_ALIGNED(uint8 interpolate_pixels_Opt[1280][4]);
1327   ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1328                      uv_step, 1280);
1329   EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
1330 
1331   int has_sse2 = TestCpuFlag(kCpuHasSSE2);
1332   if (has_sse2) {
1333     for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1334       ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1335                          uv_step, 1280);
1336     }
1337   }
1338 #endif
1339 }
1340 
TEST_F(LibYUVPlanarTest,TestCopyPlane)1341 TEST_F(LibYUVPlanarTest, TestCopyPlane) {
1342   int err = 0;
1343   int yw = benchmark_width_;
1344   int yh = benchmark_height_;
1345   int b = 12;
1346   int i, j;
1347 
1348   int y_plane_size = (yw + b * 2) * (yh + b * 2);
1349   align_buffer_page_end(orig_y, y_plane_size);
1350   align_buffer_page_end(dst_c, y_plane_size);
1351   align_buffer_page_end(dst_opt, y_plane_size);
1352 
1353   memset(orig_y, 0, y_plane_size);
1354   memset(dst_c, 0, y_plane_size);
1355   memset(dst_opt, 0, y_plane_size);
1356 
1357   // Fill image buffers with random data.
1358   for (i = b; i < (yh + b); ++i) {
1359     for (j = b; j < (yw + b); ++j) {
1360       orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
1361     }
1362   }
1363 
1364   // Fill destination buffers with random data.
1365   for (i = 0; i < y_plane_size; ++i) {
1366     uint8 random_number = fastrand() & 0x7f;
1367     dst_c[i] = random_number;
1368     dst_opt[i] = dst_c[i];
1369   }
1370 
1371   int y_off = b * (yw + b * 2) + b;
1372 
1373   int y_st = yw + b * 2;
1374   int stride = 8;
1375 
1376   // Disable all optimizations.
1377   MaskCpuFlags(disable_cpu_flags_);
1378   for (j = 0; j < benchmark_iterations_; j++) {
1379     CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
1380   }
1381 
1382   // Enable optimizations.
1383   MaskCpuFlags(benchmark_cpu_info_);
1384   for (j = 0; j < benchmark_iterations_; j++) {
1385     CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
1386   }
1387 
1388   for (i = 0; i < y_plane_size; ++i) {
1389     if (dst_c[i] != dst_opt[i])
1390       ++err;
1391   }
1392 
1393   free_aligned_buffer_page_end(orig_y);
1394   free_aligned_buffer_page_end(dst_c);
1395   free_aligned_buffer_page_end(dst_opt);
1396 
1397   EXPECT_EQ(0, err);
1398 }
1399 
TestMultiply(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1400 static int TestMultiply(int width,
1401                         int height,
1402                         int benchmark_iterations,
1403                         int disable_cpu_flags,
1404                         int benchmark_cpu_info,
1405                         int invert,
1406                         int off) {
1407   if (width < 1) {
1408     width = 1;
1409   }
1410   const int kBpp = 4;
1411   const int kStride = width * kBpp;
1412   align_buffer_page_end(src_argb_a, kStride * height + off);
1413   align_buffer_page_end(src_argb_b, kStride * height + off);
1414   align_buffer_page_end(dst_argb_c, kStride * height);
1415   align_buffer_page_end(dst_argb_opt, kStride * height);
1416   for (int i = 0; i < kStride * height; ++i) {
1417     src_argb_a[i + off] = (fastrand() & 0xff);
1418     src_argb_b[i + off] = (fastrand() & 0xff);
1419   }
1420   memset(dst_argb_c, 0, kStride * height);
1421   memset(dst_argb_opt, 0, kStride * height);
1422 
1423   MaskCpuFlags(disable_cpu_flags);
1424   ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1425                kStride, width, invert * height);
1426   MaskCpuFlags(benchmark_cpu_info);
1427   for (int i = 0; i < benchmark_iterations; ++i) {
1428     ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride,
1429                  dst_argb_opt, kStride, width, invert * height);
1430   }
1431   int max_diff = 0;
1432   for (int i = 0; i < kStride * height; ++i) {
1433     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1434                        static_cast<int>(dst_argb_opt[i]));
1435     if (abs_diff > max_diff) {
1436       max_diff = abs_diff;
1437     }
1438   }
1439   free_aligned_buffer_page_end(src_argb_a);
1440   free_aligned_buffer_page_end(src_argb_b);
1441   free_aligned_buffer_page_end(dst_argb_c);
1442   free_aligned_buffer_page_end(dst_argb_opt);
1443   return max_diff;
1444 }
1445 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Any)1446 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
1447   int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_,
1448                               benchmark_iterations_, disable_cpu_flags_,
1449                               benchmark_cpu_info_, +1, 0);
1450   EXPECT_LE(max_diff, 1);
1451 }
1452 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Unaligned)1453 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
1454   int max_diff =
1455       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1456                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1457   EXPECT_LE(max_diff, 1);
1458 }
1459 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Invert)1460 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
1461   int max_diff =
1462       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1463                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1464   EXPECT_LE(max_diff, 1);
1465 }
1466 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Opt)1467 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
1468   int max_diff =
1469       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1470                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1471   EXPECT_LE(max_diff, 1);
1472 }
1473 
TestAdd(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1474 static int TestAdd(int width,
1475                    int height,
1476                    int benchmark_iterations,
1477                    int disable_cpu_flags,
1478                    int benchmark_cpu_info,
1479                    int invert,
1480                    int off) {
1481   if (width < 1) {
1482     width = 1;
1483   }
1484   const int kBpp = 4;
1485   const int kStride = width * kBpp;
1486   align_buffer_page_end(src_argb_a, kStride * height + off);
1487   align_buffer_page_end(src_argb_b, kStride * height + off);
1488   align_buffer_page_end(dst_argb_c, kStride * height);
1489   align_buffer_page_end(dst_argb_opt, kStride * height);
1490   for (int i = 0; i < kStride * height; ++i) {
1491     src_argb_a[i + off] = (fastrand() & 0xff);
1492     src_argb_b[i + off] = (fastrand() & 0xff);
1493   }
1494   memset(dst_argb_c, 0, kStride * height);
1495   memset(dst_argb_opt, 0, kStride * height);
1496 
1497   MaskCpuFlags(disable_cpu_flags);
1498   ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1499           kStride, width, invert * height);
1500   MaskCpuFlags(benchmark_cpu_info);
1501   for (int i = 0; i < benchmark_iterations; ++i) {
1502     ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt,
1503             kStride, width, invert * height);
1504   }
1505   int max_diff = 0;
1506   for (int i = 0; i < kStride * height; ++i) {
1507     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1508                        static_cast<int>(dst_argb_opt[i]));
1509     if (abs_diff > max_diff) {
1510       max_diff = abs_diff;
1511     }
1512   }
1513   free_aligned_buffer_page_end(src_argb_a);
1514   free_aligned_buffer_page_end(src_argb_b);
1515   free_aligned_buffer_page_end(dst_argb_c);
1516   free_aligned_buffer_page_end(dst_argb_opt);
1517   return max_diff;
1518 }
1519 
TEST_F(LibYUVPlanarTest,ARGBAdd_Any)1520 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
1521   int max_diff =
1522       TestAdd(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1523               disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1524   EXPECT_LE(max_diff, 1);
1525 }
1526 
TEST_F(LibYUVPlanarTest,ARGBAdd_Unaligned)1527 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) {
1528   int max_diff =
1529       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1530               disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1531   EXPECT_LE(max_diff, 1);
1532 }
1533 
TEST_F(LibYUVPlanarTest,ARGBAdd_Invert)1534 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) {
1535   int max_diff =
1536       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1537               disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1538   EXPECT_LE(max_diff, 1);
1539 }
1540 
TEST_F(LibYUVPlanarTest,ARGBAdd_Opt)1541 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) {
1542   int max_diff =
1543       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1544               disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1545   EXPECT_LE(max_diff, 1);
1546 }
1547 
TestSubtract(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1548 static int TestSubtract(int width,
1549                         int height,
1550                         int benchmark_iterations,
1551                         int disable_cpu_flags,
1552                         int benchmark_cpu_info,
1553                         int invert,
1554                         int off) {
1555   if (width < 1) {
1556     width = 1;
1557   }
1558   const int kBpp = 4;
1559   const int kStride = width * kBpp;
1560   align_buffer_page_end(src_argb_a, kStride * height + off);
1561   align_buffer_page_end(src_argb_b, kStride * height + off);
1562   align_buffer_page_end(dst_argb_c, kStride * height);
1563   align_buffer_page_end(dst_argb_opt, kStride * height);
1564   for (int i = 0; i < kStride * height; ++i) {
1565     src_argb_a[i + off] = (fastrand() & 0xff);
1566     src_argb_b[i + off] = (fastrand() & 0xff);
1567   }
1568   memset(dst_argb_c, 0, kStride * height);
1569   memset(dst_argb_opt, 0, kStride * height);
1570 
1571   MaskCpuFlags(disable_cpu_flags);
1572   ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1573                kStride, width, invert * height);
1574   MaskCpuFlags(benchmark_cpu_info);
1575   for (int i = 0; i < benchmark_iterations; ++i) {
1576     ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride,
1577                  dst_argb_opt, kStride, width, invert * height);
1578   }
1579   int max_diff = 0;
1580   for (int i = 0; i < kStride * height; ++i) {
1581     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1582                        static_cast<int>(dst_argb_opt[i]));
1583     if (abs_diff > max_diff) {
1584       max_diff = abs_diff;
1585     }
1586   }
1587   free_aligned_buffer_page_end(src_argb_a);
1588   free_aligned_buffer_page_end(src_argb_b);
1589   free_aligned_buffer_page_end(dst_argb_c);
1590   free_aligned_buffer_page_end(dst_argb_opt);
1591   return max_diff;
1592 }
1593 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Any)1594 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
1595   int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_,
1596                               benchmark_iterations_, disable_cpu_flags_,
1597                               benchmark_cpu_info_, +1, 0);
1598   EXPECT_LE(max_diff, 1);
1599 }
1600 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Unaligned)1601 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) {
1602   int max_diff =
1603       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1604                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1605   EXPECT_LE(max_diff, 1);
1606 }
1607 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Invert)1608 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) {
1609   int max_diff =
1610       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1611                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1612   EXPECT_LE(max_diff, 1);
1613 }
1614 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Opt)1615 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) {
1616   int max_diff =
1617       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1618                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1619   EXPECT_LE(max_diff, 1);
1620 }
1621 
TestSobel(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1622 static int TestSobel(int width,
1623                      int height,
1624                      int benchmark_iterations,
1625                      int disable_cpu_flags,
1626                      int benchmark_cpu_info,
1627                      int invert,
1628                      int off) {
1629   if (width < 1) {
1630     width = 1;
1631   }
1632   const int kBpp = 4;
1633   const int kStride = width * kBpp;
1634   align_buffer_page_end(src_argb_a, kStride * height + off);
1635   align_buffer_page_end(dst_argb_c, kStride * height);
1636   align_buffer_page_end(dst_argb_opt, kStride * height);
1637   memset(src_argb_a, 0, kStride * height + off);
1638   for (int i = 0; i < kStride * height; ++i) {
1639     src_argb_a[i + off] = (fastrand() & 0xff);
1640   }
1641   memset(dst_argb_c, 0, kStride * height);
1642   memset(dst_argb_opt, 0, kStride * height);
1643 
1644   MaskCpuFlags(disable_cpu_flags);
1645   ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width,
1646             invert * height);
1647   MaskCpuFlags(benchmark_cpu_info);
1648   for (int i = 0; i < benchmark_iterations; ++i) {
1649     ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
1650               invert * height);
1651   }
1652   int max_diff = 0;
1653   for (int i = 0; i < kStride * height; ++i) {
1654     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1655                        static_cast<int>(dst_argb_opt[i]));
1656     if (abs_diff > max_diff) {
1657       max_diff = abs_diff;
1658     }
1659   }
1660   free_aligned_buffer_page_end(src_argb_a);
1661   free_aligned_buffer_page_end(dst_argb_c);
1662   free_aligned_buffer_page_end(dst_argb_opt);
1663   return max_diff;
1664 }
1665 
TEST_F(LibYUVPlanarTest,ARGBSobel_Any)1666 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
1667   int max_diff =
1668       TestSobel(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1669                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1670   EXPECT_EQ(0, max_diff);
1671 }
1672 
TEST_F(LibYUVPlanarTest,ARGBSobel_Unaligned)1673 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) {
1674   int max_diff =
1675       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1676                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1677   EXPECT_EQ(0, max_diff);
1678 }
1679 
TEST_F(LibYUVPlanarTest,ARGBSobel_Invert)1680 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) {
1681   int max_diff =
1682       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1683                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1684   EXPECT_EQ(0, max_diff);
1685 }
1686 
TEST_F(LibYUVPlanarTest,ARGBSobel_Opt)1687 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) {
1688   int max_diff =
1689       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1690                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1691   EXPECT_EQ(0, max_diff);
1692 }
1693 
TestSobelToPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1694 static int TestSobelToPlane(int width,
1695                             int height,
1696                             int benchmark_iterations,
1697                             int disable_cpu_flags,
1698                             int benchmark_cpu_info,
1699                             int invert,
1700                             int off) {
1701   if (width < 1) {
1702     width = 1;
1703   }
1704   const int kSrcBpp = 4;
1705   const int kDstBpp = 1;
1706   const int kSrcStride = (width * kSrcBpp + 15) & ~15;
1707   const int kDstStride = (width * kDstBpp + 15) & ~15;
1708   align_buffer_page_end(src_argb_a, kSrcStride * height + off);
1709   align_buffer_page_end(dst_argb_c, kDstStride * height);
1710   align_buffer_page_end(dst_argb_opt, kDstStride * height);
1711   memset(src_argb_a, 0, kSrcStride * height + off);
1712   for (int i = 0; i < kSrcStride * height; ++i) {
1713     src_argb_a[i + off] = (fastrand() & 0xff);
1714   }
1715   memset(dst_argb_c, 0, kDstStride * height);
1716   memset(dst_argb_opt, 0, kDstStride * height);
1717 
1718   MaskCpuFlags(disable_cpu_flags);
1719   ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width,
1720                    invert * height);
1721   MaskCpuFlags(benchmark_cpu_info);
1722   for (int i = 0; i < benchmark_iterations; ++i) {
1723     ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride,
1724                      width, invert * height);
1725   }
1726   int max_diff = 0;
1727   for (int i = 0; i < kDstStride * height; ++i) {
1728     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1729                        static_cast<int>(dst_argb_opt[i]));
1730     if (abs_diff > max_diff) {
1731       max_diff = abs_diff;
1732     }
1733   }
1734   free_aligned_buffer_page_end(src_argb_a);
1735   free_aligned_buffer_page_end(dst_argb_c);
1736   free_aligned_buffer_page_end(dst_argb_opt);
1737   return max_diff;
1738 }
1739 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Any)1740 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
1741   int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_,
1742                                   benchmark_iterations_, disable_cpu_flags_,
1743                                   benchmark_cpu_info_, +1, 0);
1744   EXPECT_EQ(0, max_diff);
1745 }
1746 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Unaligned)1747 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) {
1748   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1749                                   benchmark_iterations_, disable_cpu_flags_,
1750                                   benchmark_cpu_info_, +1, 1);
1751   EXPECT_EQ(0, max_diff);
1752 }
1753 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Invert)1754 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) {
1755   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1756                                   benchmark_iterations_, disable_cpu_flags_,
1757                                   benchmark_cpu_info_, -1, 0);
1758   EXPECT_EQ(0, max_diff);
1759 }
1760 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Opt)1761 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) {
1762   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1763                                   benchmark_iterations_, disable_cpu_flags_,
1764                                   benchmark_cpu_info_, +1, 0);
1765   EXPECT_EQ(0, max_diff);
1766 }
1767 
TestSobelXY(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1768 static int TestSobelXY(int width,
1769                        int height,
1770                        int benchmark_iterations,
1771                        int disable_cpu_flags,
1772                        int benchmark_cpu_info,
1773                        int invert,
1774                        int off) {
1775   if (width < 1) {
1776     width = 1;
1777   }
1778   const int kBpp = 4;
1779   const int kStride = width * kBpp;
1780   align_buffer_page_end(src_argb_a, kStride * height + off);
1781   align_buffer_page_end(dst_argb_c, kStride * height);
1782   align_buffer_page_end(dst_argb_opt, kStride * height);
1783   memset(src_argb_a, 0, kStride * height + off);
1784   for (int i = 0; i < kStride * height; ++i) {
1785     src_argb_a[i + off] = (fastrand() & 0xff);
1786   }
1787   memset(dst_argb_c, 0, kStride * height);
1788   memset(dst_argb_opt, 0, kStride * height);
1789 
1790   MaskCpuFlags(disable_cpu_flags);
1791   ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width,
1792               invert * height);
1793   MaskCpuFlags(benchmark_cpu_info);
1794   for (int i = 0; i < benchmark_iterations; ++i) {
1795     ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
1796                 invert * height);
1797   }
1798   int max_diff = 0;
1799   for (int i = 0; i < kStride * height; ++i) {
1800     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1801                        static_cast<int>(dst_argb_opt[i]));
1802     if (abs_diff > max_diff) {
1803       max_diff = abs_diff;
1804     }
1805   }
1806   free_aligned_buffer_page_end(src_argb_a);
1807   free_aligned_buffer_page_end(dst_argb_c);
1808   free_aligned_buffer_page_end(dst_argb_opt);
1809   return max_diff;
1810 }
1811 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Any)1812 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
1813   int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_,
1814                              benchmark_iterations_, disable_cpu_flags_,
1815                              benchmark_cpu_info_, +1, 0);
1816   EXPECT_EQ(0, max_diff);
1817 }
1818 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Unaligned)1819 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) {
1820   int max_diff =
1821       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1822                   disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1823   EXPECT_EQ(0, max_diff);
1824 }
1825 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Invert)1826 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) {
1827   int max_diff =
1828       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1829                   disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1830   EXPECT_EQ(0, max_diff);
1831 }
1832 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Opt)1833 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) {
1834   int max_diff =
1835       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1836                   disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1837   EXPECT_EQ(0, max_diff);
1838 }
1839 
TestBlur(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int radius)1840 static int TestBlur(int width,
1841                     int height,
1842                     int benchmark_iterations,
1843                     int disable_cpu_flags,
1844                     int benchmark_cpu_info,
1845                     int invert,
1846                     int off,
1847                     int radius) {
1848   if (width < 1) {
1849     width = 1;
1850   }
1851   const int kBpp = 4;
1852   const int kStride = width * kBpp;
1853   align_buffer_page_end(src_argb_a, kStride * height + off);
1854   align_buffer_page_end(dst_cumsum, width * height * 16);
1855   align_buffer_page_end(dst_argb_c, kStride * height);
1856   align_buffer_page_end(dst_argb_opt, kStride * height);
1857   for (int i = 0; i < kStride * height; ++i) {
1858     src_argb_a[i + off] = (fastrand() & 0xff);
1859   }
1860   memset(dst_cumsum, 0, width * height * 16);
1861   memset(dst_argb_c, 0, kStride * height);
1862   memset(dst_argb_opt, 0, kStride * height);
1863 
1864   MaskCpuFlags(disable_cpu_flags);
1865   ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
1866            reinterpret_cast<int32*>(dst_cumsum), width * 4, width,
1867            invert * height, radius);
1868   MaskCpuFlags(benchmark_cpu_info);
1869   for (int i = 0; i < benchmark_iterations; ++i) {
1870     ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
1871              reinterpret_cast<int32*>(dst_cumsum), width * 4, width,
1872              invert * height, radius);
1873   }
1874   int max_diff = 0;
1875   for (int i = 0; i < kStride * height; ++i) {
1876     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1877                        static_cast<int>(dst_argb_opt[i]));
1878     if (abs_diff > max_diff) {
1879       max_diff = abs_diff;
1880     }
1881   }
1882   free_aligned_buffer_page_end(src_argb_a);
1883   free_aligned_buffer_page_end(dst_cumsum);
1884   free_aligned_buffer_page_end(dst_argb_c);
1885   free_aligned_buffer_page_end(dst_argb_opt);
1886   return max_diff;
1887 }
1888 
1889 static const int kBlurSize = 55;
TEST_F(LibYUVPlanarTest,ARGBBlur_Any)1890 TEST_F(LibYUVPlanarTest, ARGBBlur_Any) {
1891   int max_diff =
1892       TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1893                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
1894   EXPECT_LE(max_diff, 1);
1895 }
1896 
TEST_F(LibYUVPlanarTest,ARGBBlur_Unaligned)1897 TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) {
1898   int max_diff =
1899       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1900                disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
1901   EXPECT_LE(max_diff, 1);
1902 }
1903 
TEST_F(LibYUVPlanarTest,ARGBBlur_Invert)1904 TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) {
1905   int max_diff =
1906       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1907                disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
1908   EXPECT_LE(max_diff, 1);
1909 }
1910 
TEST_F(LibYUVPlanarTest,ARGBBlur_Opt)1911 TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) {
1912   int max_diff =
1913       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1914                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
1915   EXPECT_LE(max_diff, 1);
1916 }
1917 
1918 static const int kBlurSmallSize = 5;
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Any)1919 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) {
1920   int max_diff =
1921       TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1922                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
1923   EXPECT_LE(max_diff, 1);
1924 }
1925 
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Unaligned)1926 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) {
1927   int max_diff =
1928       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1929                disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
1930   EXPECT_LE(max_diff, 1);
1931 }
1932 
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Invert)1933 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) {
1934   int max_diff =
1935       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1936                disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
1937   EXPECT_LE(max_diff, 1);
1938 }
1939 
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Opt)1940 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) {
1941   int max_diff =
1942       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1943                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
1944   EXPECT_LE(max_diff, 1);
1945 }
1946 
TEST_F(LibYUVPlanarTest,TestARGBPolynomial)1947 TEST_F(LibYUVPlanarTest, TestARGBPolynomial) {
1948   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
1949   SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
1950   SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
1951   memset(orig_pixels, 0, sizeof(orig_pixels));
1952 
1953   SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
1954       0.94230f,  -3.03300f,    -2.92500f,    0.f,  // C0
1955       0.584500f, 1.112000f,    1.535000f,    1.f,  // C1 x
1956       0.001313f, -0.002503f,   -0.004496f,   0.f,  // C2 x * x
1957       0.0f,      0.000006965f, 0.000008781f, 0.f,  // C3 x * x * x
1958   };
1959 
1960   // Test blue
1961   orig_pixels[0][0] = 255u;
1962   orig_pixels[0][1] = 0u;
1963   orig_pixels[0][2] = 0u;
1964   orig_pixels[0][3] = 128u;
1965   // Test green
1966   orig_pixels[1][0] = 0u;
1967   orig_pixels[1][1] = 255u;
1968   orig_pixels[1][2] = 0u;
1969   orig_pixels[1][3] = 0u;
1970   // Test red
1971   orig_pixels[2][0] = 0u;
1972   orig_pixels[2][1] = 0u;
1973   orig_pixels[2][2] = 255u;
1974   orig_pixels[2][3] = 255u;
1975   // Test white
1976   orig_pixels[3][0] = 255u;
1977   orig_pixels[3][1] = 255u;
1978   orig_pixels[3][2] = 255u;
1979   orig_pixels[3][3] = 255u;
1980   // Test color
1981   orig_pixels[4][0] = 16u;
1982   orig_pixels[4][1] = 64u;
1983   orig_pixels[4][2] = 192u;
1984   orig_pixels[4][3] = 224u;
1985   // Do 16 to test asm version.
1986   ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
1987                  &kWarmifyPolynomial[0], 16, 1);
1988   EXPECT_EQ(235u, dst_pixels_opt[0][0]);
1989   EXPECT_EQ(0u, dst_pixels_opt[0][1]);
1990   EXPECT_EQ(0u, dst_pixels_opt[0][2]);
1991   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
1992   EXPECT_EQ(0u, dst_pixels_opt[1][0]);
1993   EXPECT_EQ(233u, dst_pixels_opt[1][1]);
1994   EXPECT_EQ(0u, dst_pixels_opt[1][2]);
1995   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
1996   EXPECT_EQ(0u, dst_pixels_opt[2][0]);
1997   EXPECT_EQ(0u, dst_pixels_opt[2][1]);
1998   EXPECT_EQ(241u, dst_pixels_opt[2][2]);
1999   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2000   EXPECT_EQ(235u, dst_pixels_opt[3][0]);
2001   EXPECT_EQ(233u, dst_pixels_opt[3][1]);
2002   EXPECT_EQ(241u, dst_pixels_opt[3][2]);
2003   EXPECT_EQ(255u, dst_pixels_opt[3][3]);
2004   EXPECT_EQ(10u, dst_pixels_opt[4][0]);
2005   EXPECT_EQ(59u, dst_pixels_opt[4][1]);
2006   EXPECT_EQ(188u, dst_pixels_opt[4][2]);
2007   EXPECT_EQ(224u, dst_pixels_opt[4][3]);
2008 
2009   for (int i = 0; i < 1280; ++i) {
2010     orig_pixels[i][0] = i;
2011     orig_pixels[i][1] = i / 2;
2012     orig_pixels[i][2] = i / 3;
2013     orig_pixels[i][3] = i;
2014   }
2015 
2016   MaskCpuFlags(disable_cpu_flags_);
2017   ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2018                  &kWarmifyPolynomial[0], 1280, 1);
2019   MaskCpuFlags(benchmark_cpu_info_);
2020 
2021   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2022     ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2023                    &kWarmifyPolynomial[0], 1280, 1);
2024   }
2025 
2026   for (int i = 0; i < 1280; ++i) {
2027     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2028     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2029     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2030     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2031   }
2032 }
2033 
TestHalfFloatPlane(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale,int mask)2034 int TestHalfFloatPlane(int benchmark_width,
2035                        int benchmark_height,
2036                        int benchmark_iterations,
2037                        int disable_cpu_flags,
2038                        int benchmark_cpu_info,
2039                        float scale,
2040                        int mask) {
2041   int i, j;
2042   const int y_plane_size = benchmark_width * benchmark_height * 2;
2043 
2044   align_buffer_page_end(orig_y, y_plane_size * 3);
2045   uint8* dst_opt = orig_y + y_plane_size;
2046   uint8* dst_c = orig_y + y_plane_size * 2;
2047 
2048   MemRandomize(orig_y, y_plane_size);
2049   memset(dst_c, 0, y_plane_size);
2050   memset(dst_opt, 1, y_plane_size);
2051 
2052   for (i = 0; i < y_plane_size / 2; ++i) {
2053     reinterpret_cast<uint16*>(orig_y)[i] &= mask;
2054   }
2055 
2056   // Disable all optimizations.
2057   MaskCpuFlags(disable_cpu_flags);
2058   for (j = 0; j < benchmark_iterations; j++) {
2059     HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2,
2060                    reinterpret_cast<uint16*>(dst_c), benchmark_width * 2, scale,
2061                    benchmark_width, benchmark_height);
2062   }
2063 
2064   // Enable optimizations.
2065   MaskCpuFlags(benchmark_cpu_info);
2066   for (j = 0; j < benchmark_iterations; j++) {
2067     HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2,
2068                    reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2,
2069                    scale, benchmark_width, benchmark_height);
2070   }
2071 
2072   int max_diff = 0;
2073   for (i = 0; i < y_plane_size / 2; ++i) {
2074     int abs_diff = abs(static_cast<int>(reinterpret_cast<uint16*>(dst_c)[i]) -
2075                        static_cast<int>(reinterpret_cast<uint16*>(dst_opt)[i]));
2076     if (abs_diff > max_diff) {
2077       max_diff = abs_diff;
2078     }
2079   }
2080 
2081   free_aligned_buffer_page_end(orig_y);
2082   return max_diff;
2083 }
2084 
2085 #if defined(__arm__)
EnableFlushDenormalToZero(void)2086 static void EnableFlushDenormalToZero(void) {
2087   uint32_t cw;
2088   __asm__ __volatile__(
2089       "vmrs   %0, fpscr         \n"
2090       "orr    %0, %0, #0x1000000        \n"
2091       "vmsr   fpscr, %0         \n"
2092       : "=r"(cw)::"memory");
2093 }
2094 #endif
2095 
2096 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
2097 // exponent to be less than 0.  15 - log2(65536) = -1/  This shouldnt normally
2098 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
2099 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_denormal)2100 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
2101 // 32 bit arm rounding on denormal case is off by 1 compared to C.
2102 #if defined(__arm__)
2103   EnableFlushDenormalToZero();
2104 #endif
2105   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2106                                 benchmark_iterations_, disable_cpu_flags_,
2107                                 benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
2108   EXPECT_EQ(0, diff);
2109 }
2110 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_One)2111 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
2112   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2113                                 benchmark_iterations_, disable_cpu_flags_,
2114                                 benchmark_cpu_info_, 1.0f, 65535);
2115   EXPECT_LE(diff, 1);
2116 }
2117 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_Opt)2118 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
2119   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2120                                 benchmark_iterations_, disable_cpu_flags_,
2121                                 benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
2122   EXPECT_EQ(0, diff);
2123 }
2124 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_10bit_Opt)2125 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
2126   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2127                                 benchmark_iterations_, disable_cpu_flags_,
2128                                 benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
2129   EXPECT_EQ(0, diff);
2130 }
2131 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_9bit_Opt)2132 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
2133   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2134                                 benchmark_iterations_, disable_cpu_flags_,
2135                                 benchmark_cpu_info_, 1.0f / 512.0f, 511);
2136   EXPECT_EQ(0, diff);
2137 }
2138 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Opt)2139 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
2140   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2141                                 benchmark_iterations_, disable_cpu_flags_,
2142                                 benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
2143   EXPECT_EQ(0, diff);
2144 }
2145 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Offby1)2146 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
2147   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2148                                 benchmark_iterations_, disable_cpu_flags_,
2149                                 benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
2150   EXPECT_EQ(0, diff);
2151 }
2152 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_One)2153 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
2154   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2155                                 benchmark_iterations_, disable_cpu_flags_,
2156                                 benchmark_cpu_info_, 1.0f, 2047);
2157   EXPECT_EQ(0, diff);
2158 }
2159 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_12bit_One)2160 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
2161   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2162                                 benchmark_iterations_, disable_cpu_flags_,
2163                                 benchmark_cpu_info_, 1.0f, 4095);
2164   EXPECT_LE(diff, 1);
2165 }
2166 
TEST_F(LibYUVPlanarTest,TestARGBLumaColorTable)2167 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
2168   SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
2169   SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
2170   SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
2171   memset(orig_pixels, 0, sizeof(orig_pixels));
2172 
2173   align_buffer_page_end(lumacolortable, 32768);
2174   int v = 0;
2175   for (int i = 0; i < 32768; ++i) {
2176     lumacolortable[i] = v;
2177     v += 3;
2178   }
2179   // Test blue
2180   orig_pixels[0][0] = 255u;
2181   orig_pixels[0][1] = 0u;
2182   orig_pixels[0][2] = 0u;
2183   orig_pixels[0][3] = 128u;
2184   // Test green
2185   orig_pixels[1][0] = 0u;
2186   orig_pixels[1][1] = 255u;
2187   orig_pixels[1][2] = 0u;
2188   orig_pixels[1][3] = 0u;
2189   // Test red
2190   orig_pixels[2][0] = 0u;
2191   orig_pixels[2][1] = 0u;
2192   orig_pixels[2][2] = 255u;
2193   orig_pixels[2][3] = 255u;
2194   // Test color
2195   orig_pixels[3][0] = 16u;
2196   orig_pixels[3][1] = 64u;
2197   orig_pixels[3][2] = 192u;
2198   orig_pixels[3][3] = 224u;
2199   // Do 16 to test asm version.
2200   ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2201                      &lumacolortable[0], 16, 1);
2202   EXPECT_EQ(253u, dst_pixels_opt[0][0]);
2203   EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2204   EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2205   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2206   EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2207   EXPECT_EQ(253u, dst_pixels_opt[1][1]);
2208   EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2209   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2210   EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2211   EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2212   EXPECT_EQ(253u, dst_pixels_opt[2][2]);
2213   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2214   EXPECT_EQ(48u, dst_pixels_opt[3][0]);
2215   EXPECT_EQ(192u, dst_pixels_opt[3][1]);
2216   EXPECT_EQ(64u, dst_pixels_opt[3][2]);
2217   EXPECT_EQ(224u, dst_pixels_opt[3][3]);
2218 
2219   for (int i = 0; i < 1280; ++i) {
2220     orig_pixels[i][0] = i;
2221     orig_pixels[i][1] = i / 2;
2222     orig_pixels[i][2] = i / 3;
2223     orig_pixels[i][3] = i;
2224   }
2225 
2226   MaskCpuFlags(disable_cpu_flags_);
2227   ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2228                      lumacolortable, 1280, 1);
2229   MaskCpuFlags(benchmark_cpu_info_);
2230 
2231   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2232     ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2233                        lumacolortable, 1280, 1);
2234   }
2235   for (int i = 0; i < 1280; ++i) {
2236     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2237     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2238     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2239     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2240   }
2241 
2242   free_aligned_buffer_page_end(lumacolortable);
2243 }
2244 
TEST_F(LibYUVPlanarTest,TestARGBCopyAlpha)2245 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
2246   const int kSize = benchmark_width_ * benchmark_height_ * 4;
2247   align_buffer_page_end(orig_pixels, kSize);
2248   align_buffer_page_end(dst_pixels_opt, kSize);
2249   align_buffer_page_end(dst_pixels_c, kSize);
2250 
2251   MemRandomize(orig_pixels, kSize);
2252   MemRandomize(dst_pixels_opt, kSize);
2253   memcpy(dst_pixels_c, dst_pixels_opt, kSize);
2254 
2255   MaskCpuFlags(disable_cpu_flags_);
2256   ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c,
2257                 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2258   MaskCpuFlags(benchmark_cpu_info_);
2259 
2260   for (int i = 0; i < benchmark_iterations_; ++i) {
2261     ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt,
2262                   benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2263   }
2264   for (int i = 0; i < kSize; ++i) {
2265     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2266   }
2267 
2268   free_aligned_buffer_page_end(dst_pixels_c);
2269   free_aligned_buffer_page_end(dst_pixels_opt);
2270   free_aligned_buffer_page_end(orig_pixels);
2271 }
2272 
TEST_F(LibYUVPlanarTest,TestARGBExtractAlpha)2273 TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
2274   const int kPixels = benchmark_width_ * benchmark_height_;
2275   align_buffer_page_end(src_pixels, kPixels * 4);
2276   align_buffer_page_end(dst_pixels_opt, kPixels);
2277   align_buffer_page_end(dst_pixels_c, kPixels);
2278 
2279   MemRandomize(src_pixels, kPixels * 4);
2280   MemRandomize(dst_pixels_opt, kPixels);
2281   memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
2282 
2283   MaskCpuFlags(disable_cpu_flags_);
2284   ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2285                    benchmark_width_, benchmark_width_, benchmark_height_);
2286   MaskCpuFlags(benchmark_cpu_info_);
2287 
2288   for (int i = 0; i < benchmark_iterations_; ++i) {
2289     ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2290                      benchmark_width_, benchmark_width_, benchmark_height_);
2291   }
2292   for (int i = 0; i < kPixels; ++i) {
2293     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2294   }
2295 
2296   free_aligned_buffer_page_end(dst_pixels_c);
2297   free_aligned_buffer_page_end(dst_pixels_opt);
2298   free_aligned_buffer_page_end(src_pixels);
2299 }
2300 
TEST_F(LibYUVPlanarTest,TestARGBCopyYToAlpha)2301 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
2302   const int kPixels = benchmark_width_ * benchmark_height_;
2303   align_buffer_page_end(orig_pixels, kPixels);
2304   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
2305   align_buffer_page_end(dst_pixels_c, kPixels * 4);
2306 
2307   MemRandomize(orig_pixels, kPixels);
2308   MemRandomize(dst_pixels_opt, kPixels * 4);
2309   memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4);
2310 
2311   MaskCpuFlags(disable_cpu_flags_);
2312   ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2313                    benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2314   MaskCpuFlags(benchmark_cpu_info_);
2315 
2316   for (int i = 0; i < benchmark_iterations_; ++i) {
2317     ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2318                      benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2319   }
2320   for (int i = 0; i < kPixels * 4; ++i) {
2321     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2322   }
2323 
2324   free_aligned_buffer_page_end(dst_pixels_c);
2325   free_aligned_buffer_page_end(dst_pixels_opt);
2326   free_aligned_buffer_page_end(orig_pixels);
2327 }
2328 
TestARGBRect(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int bpp)2329 static int TestARGBRect(int width,
2330                         int height,
2331                         int benchmark_iterations,
2332                         int disable_cpu_flags,
2333                         int benchmark_cpu_info,
2334                         int invert,
2335                         int off,
2336                         int bpp) {
2337   if (width < 1) {
2338     width = 1;
2339   }
2340   const int kStride = width * bpp;
2341   const int kSize = kStride * height;
2342   const uint32 v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
2343 
2344   align_buffer_page_end(dst_argb_c, kSize + off);
2345   align_buffer_page_end(dst_argb_opt, kSize + off);
2346 
2347   MemRandomize(dst_argb_c + off, kSize);
2348   memcpy(dst_argb_opt + off, dst_argb_c + off, kSize);
2349 
2350   MaskCpuFlags(disable_cpu_flags);
2351   if (bpp == 4) {
2352     ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32);
2353   } else {
2354     SetPlane(dst_argb_c + off, kStride, width, invert * height, v32);
2355   }
2356 
2357   MaskCpuFlags(benchmark_cpu_info);
2358   for (int i = 0; i < benchmark_iterations; ++i) {
2359     if (bpp == 4) {
2360       ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32);
2361     } else {
2362       SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32);
2363     }
2364   }
2365   int max_diff = 0;
2366   for (int i = 0; i < kStride * height; ++i) {
2367     int abs_diff = abs(static_cast<int>(dst_argb_c[i + off]) -
2368                        static_cast<int>(dst_argb_opt[i + off]));
2369     if (abs_diff > max_diff) {
2370       max_diff = abs_diff;
2371     }
2372   }
2373   free_aligned_buffer_page_end(dst_argb_c);
2374   free_aligned_buffer_page_end(dst_argb_opt);
2375   return max_diff;
2376 }
2377 
TEST_F(LibYUVPlanarTest,ARGBRect_Any)2378 TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
2379   int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
2380                               benchmark_iterations_, disable_cpu_flags_,
2381                               benchmark_cpu_info_, +1, 0, 4);
2382   EXPECT_EQ(0, max_diff);
2383 }
2384 
TEST_F(LibYUVPlanarTest,ARGBRect_Unaligned)2385 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) {
2386   int max_diff =
2387       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2388                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4);
2389   EXPECT_EQ(0, max_diff);
2390 }
2391 
TEST_F(LibYUVPlanarTest,ARGBRect_Invert)2392 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) {
2393   int max_diff =
2394       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2395                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4);
2396   EXPECT_EQ(0, max_diff);
2397 }
2398 
TEST_F(LibYUVPlanarTest,ARGBRect_Opt)2399 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
2400   int max_diff =
2401       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2402                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4);
2403   EXPECT_EQ(0, max_diff);
2404 }
2405 
TEST_F(LibYUVPlanarTest,SetPlane_Any)2406 TEST_F(LibYUVPlanarTest, SetPlane_Any) {
2407   int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
2408                               benchmark_iterations_, disable_cpu_flags_,
2409                               benchmark_cpu_info_, +1, 0, 1);
2410   EXPECT_EQ(0, max_diff);
2411 }
2412 
TEST_F(LibYUVPlanarTest,SetPlane_Unaligned)2413 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) {
2414   int max_diff =
2415       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2416                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
2417   EXPECT_EQ(0, max_diff);
2418 }
2419 
TEST_F(LibYUVPlanarTest,SetPlane_Invert)2420 TEST_F(LibYUVPlanarTest, SetPlane_Invert) {
2421   int max_diff =
2422       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2423                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
2424   EXPECT_EQ(0, max_diff);
2425 }
2426 
TEST_F(LibYUVPlanarTest,SetPlane_Opt)2427 TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
2428   int max_diff =
2429       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2430                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
2431   EXPECT_EQ(0, max_diff);
2432 }
2433 
TEST_F(LibYUVPlanarTest,MergeUVPlane_Opt)2434 TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
2435   const int kPixels = benchmark_width_ * benchmark_height_;
2436   align_buffer_page_end(src_pixels, kPixels * 2);
2437   align_buffer_page_end(tmp_pixels_u, kPixels);
2438   align_buffer_page_end(tmp_pixels_v, kPixels);
2439   align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2440   align_buffer_page_end(dst_pixels_c, kPixels * 2);
2441 
2442   MemRandomize(src_pixels, kPixels * 2);
2443   MemRandomize(tmp_pixels_u, kPixels);
2444   MemRandomize(tmp_pixels_v, kPixels);
2445   MemRandomize(dst_pixels_opt, kPixels * 2);
2446   MemRandomize(dst_pixels_c, kPixels * 2);
2447 
2448   MaskCpuFlags(disable_cpu_flags_);
2449   SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2450                tmp_pixels_v, benchmark_width_, benchmark_width_,
2451                benchmark_height_);
2452   MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2453                dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2454                benchmark_height_);
2455   MaskCpuFlags(benchmark_cpu_info_);
2456 
2457   SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2458                tmp_pixels_v, benchmark_width_, benchmark_width_,
2459                benchmark_height_);
2460 
2461   for (int i = 0; i < benchmark_iterations_; ++i) {
2462     MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2463                  dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2464                  benchmark_height_);
2465   }
2466 
2467   for (int i = 0; i < kPixels * 2; ++i) {
2468     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2469   }
2470 
2471   free_aligned_buffer_page_end(src_pixels);
2472   free_aligned_buffer_page_end(tmp_pixels_u);
2473   free_aligned_buffer_page_end(tmp_pixels_v);
2474   free_aligned_buffer_page_end(dst_pixels_opt);
2475   free_aligned_buffer_page_end(dst_pixels_c);
2476 }
2477 
TEST_F(LibYUVPlanarTest,SplitUVPlane_Opt)2478 TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
2479   const int kPixels = benchmark_width_ * benchmark_height_;
2480   align_buffer_page_end(src_pixels, kPixels * 2);
2481   align_buffer_page_end(tmp_pixels_u, kPixels);
2482   align_buffer_page_end(tmp_pixels_v, kPixels);
2483   align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2484   align_buffer_page_end(dst_pixels_c, kPixels * 2);
2485 
2486   MemRandomize(src_pixels, kPixels * 2);
2487   MemRandomize(tmp_pixels_u, kPixels);
2488   MemRandomize(tmp_pixels_v, kPixels);
2489   MemRandomize(dst_pixels_opt, kPixels * 2);
2490   MemRandomize(dst_pixels_c, kPixels * 2);
2491 
2492   MaskCpuFlags(disable_cpu_flags_);
2493   SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2494                tmp_pixels_v, benchmark_width_, benchmark_width_,
2495                benchmark_height_);
2496   MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2497                dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2498                benchmark_height_);
2499   MaskCpuFlags(benchmark_cpu_info_);
2500 
2501   for (int i = 0; i < benchmark_iterations_; ++i) {
2502     SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u,
2503                  benchmark_width_, tmp_pixels_v, benchmark_width_,
2504                  benchmark_width_, benchmark_height_);
2505   }
2506   MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2507                dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2508                benchmark_height_);
2509 
2510   for (int i = 0; i < kPixels * 2; ++i) {
2511     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2512   }
2513 
2514   free_aligned_buffer_page_end(src_pixels);
2515   free_aligned_buffer_page_end(tmp_pixels_u);
2516   free_aligned_buffer_page_end(tmp_pixels_v);
2517   free_aligned_buffer_page_end(dst_pixels_opt);
2518   free_aligned_buffer_page_end(dst_pixels_c);
2519 }
2520 
2521 }  // namespace libyuv
2522