• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <stdlib.h>
13 #include <time.h>
14 
15 // row.h defines SIMD_ALIGNED, overriding unit_test.h
16 #include "libyuv/row.h" /* For ScaleSumSamples_Neon */
17 
18 #include "../unit_test/unit_test.h"
19 #include "libyuv/compare.h"
20 #include "libyuv/convert.h"
21 #include "libyuv/convert_argb.h"
22 #include "libyuv/convert_from.h"
23 #include "libyuv/convert_from_argb.h"
24 #include "libyuv/cpu_id.h"
25 #include "libyuv/planar_functions.h"
26 #include "libyuv/rotate.h"
27 
28 namespace libyuv {
29 
TEST_F(LibYUVPlanarTest,TestAttenuate)30 TEST_F(LibYUVPlanarTest, TestAttenuate) {
31   const int kSize = 1280 * 4;
32   align_buffer_page_end(orig_pixels, kSize);
33   align_buffer_page_end(atten_pixels, kSize);
34   align_buffer_page_end(unatten_pixels, kSize);
35   align_buffer_page_end(atten2_pixels, kSize);
36 
37   // Test unattenuation clamps
38   orig_pixels[0 * 4 + 0] = 200u;
39   orig_pixels[0 * 4 + 1] = 129u;
40   orig_pixels[0 * 4 + 2] = 127u;
41   orig_pixels[0 * 4 + 3] = 128u;
42   // Test unattenuation transparent and opaque are unaffected
43   orig_pixels[1 * 4 + 0] = 16u;
44   orig_pixels[1 * 4 + 1] = 64u;
45   orig_pixels[1 * 4 + 2] = 192u;
46   orig_pixels[1 * 4 + 3] = 0u;
47   orig_pixels[2 * 4 + 0] = 16u;
48   orig_pixels[2 * 4 + 1] = 64u;
49   orig_pixels[2 * 4 + 2] = 192u;
50   orig_pixels[2 * 4 + 3] = 255u;
51   orig_pixels[3 * 4 + 0] = 16u;
52   orig_pixels[3 * 4 + 1] = 64u;
53   orig_pixels[3 * 4 + 2] = 192u;
54   orig_pixels[3 * 4 + 3] = 128u;
55   ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1);
56   EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
57   EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
58   EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
59   EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]);
60   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]);
61   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
62   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
63   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
64   EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]);
65   EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]);
66   EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]);
67   EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]);
68   EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]);
69   EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]);
70   EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]);
71   EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]);
72 
73   for (int i = 0; i < 1280; ++i) {
74     orig_pixels[i * 4 + 0] = i;
75     orig_pixels[i * 4 + 1] = i / 2;
76     orig_pixels[i * 4 + 2] = i / 3;
77     orig_pixels[i * 4 + 3] = i;
78   }
79   ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1);
80   ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1);
81   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
82     ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
83   }
84   for (int i = 0; i < 1280; ++i) {
85     EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2);
86     EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2);
87     EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2);
88     EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2);
89   }
90   // Make sure transparent, 50% and opaque are fully accurate.
91   EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
92   EXPECT_EQ(0, atten_pixels[0 * 4 + 1]);
93   EXPECT_EQ(0, atten_pixels[0 * 4 + 2]);
94   EXPECT_EQ(0, atten_pixels[0 * 4 + 3]);
95   EXPECT_EQ(64, atten_pixels[128 * 4 + 0]);
96   EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
97   EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
98   EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
99   EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1);
100   EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1);
101   EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1);
102   EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
103 
104   free_aligned_buffer_page_end(atten2_pixels);
105   free_aligned_buffer_page_end(unatten_pixels);
106   free_aligned_buffer_page_end(atten_pixels);
107   free_aligned_buffer_page_end(orig_pixels);
108 }
109 
TestAttenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)110 static int TestAttenuateI(int width,
111                           int height,
112                           int benchmark_iterations,
113                           int disable_cpu_flags,
114                           int benchmark_cpu_info,
115                           int invert,
116                           int off) {
117   if (width < 1) {
118     width = 1;
119   }
120   const int kBpp = 4;
121   const int kStride = width * kBpp;
122   align_buffer_page_end(src_argb, kStride * height + off);
123   align_buffer_page_end(dst_argb_c, kStride * height);
124   align_buffer_page_end(dst_argb_opt, kStride * height);
125   for (int i = 0; i < kStride * height; ++i) {
126     src_argb[i + off] = (fastrand() & 0xff);
127   }
128   memset(dst_argb_c, 0, kStride * height);
129   memset(dst_argb_opt, 0, kStride * height);
130 
131   MaskCpuFlags(disable_cpu_flags);
132   ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
133                 invert * height);
134   MaskCpuFlags(benchmark_cpu_info);
135   for (int i = 0; i < benchmark_iterations; ++i) {
136     ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
137                   invert * height);
138   }
139   int max_diff = 0;
140   for (int i = 0; i < kStride * height; ++i) {
141     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
142                        static_cast<int>(dst_argb_opt[i]));
143     if (abs_diff > max_diff) {
144       max_diff = abs_diff;
145     }
146   }
147   free_aligned_buffer_page_end(src_argb);
148   free_aligned_buffer_page_end(dst_argb_c);
149   free_aligned_buffer_page_end(dst_argb_opt);
150   return max_diff;
151 }
152 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Any)153 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
154   int max_diff = TestAttenuateI(benchmark_width_ - 1, benchmark_height_,
155                                 benchmark_iterations_, disable_cpu_flags_,
156                                 benchmark_cpu_info_, +1, 0);
157   EXPECT_LE(max_diff, 2);
158 }
159 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Unaligned)160 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
161   int max_diff =
162       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
163                      disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
164   EXPECT_LE(max_diff, 2);
165 }
166 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Invert)167 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
168   int max_diff =
169       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
170                      disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
171   EXPECT_LE(max_diff, 2);
172 }
173 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Opt)174 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
175   int max_diff =
176       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
177                      disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
178   EXPECT_LE(max_diff, 2);
179 }
180 
TestUnattenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)181 static int TestUnattenuateI(int width,
182                             int height,
183                             int benchmark_iterations,
184                             int disable_cpu_flags,
185                             int benchmark_cpu_info,
186                             int invert,
187                             int off) {
188   if (width < 1) {
189     width = 1;
190   }
191   const int kBpp = 4;
192   const int kStride = width * kBpp;
193   align_buffer_page_end(src_argb, kStride * height + off);
194   align_buffer_page_end(dst_argb_c, kStride * height);
195   align_buffer_page_end(dst_argb_opt, kStride * height);
196   for (int i = 0; i < kStride * height; ++i) {
197     src_argb[i + off] = (fastrand() & 0xff);
198   }
199   ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width,
200                 height);
201   memset(dst_argb_c, 0, kStride * height);
202   memset(dst_argb_opt, 0, kStride * height);
203 
204   MaskCpuFlags(disable_cpu_flags);
205   ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
206                   invert * height);
207   MaskCpuFlags(benchmark_cpu_info);
208   for (int i = 0; i < benchmark_iterations; ++i) {
209     ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
210                     invert * height);
211   }
212   int max_diff = 0;
213   for (int i = 0; i < kStride * height; ++i) {
214     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
215                        static_cast<int>(dst_argb_opt[i]));
216     if (abs_diff > max_diff) {
217       max_diff = abs_diff;
218     }
219   }
220   free_aligned_buffer_page_end(src_argb);
221   free_aligned_buffer_page_end(dst_argb_c);
222   free_aligned_buffer_page_end(dst_argb_opt);
223   return max_diff;
224 }
225 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Any)226 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
227   int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_,
228                                   benchmark_iterations_, disable_cpu_flags_,
229                                   benchmark_cpu_info_, +1, 0);
230   EXPECT_LE(max_diff, 2);
231 }
232 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Unaligned)233 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
234   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
235                                   benchmark_iterations_, disable_cpu_flags_,
236                                   benchmark_cpu_info_, +1, 1);
237   EXPECT_LE(max_diff, 2);
238 }
239 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Invert)240 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
241   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
242                                   benchmark_iterations_, disable_cpu_flags_,
243                                   benchmark_cpu_info_, -1, 0);
244   EXPECT_LE(max_diff, 2);
245 }
246 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Opt)247 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
248   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
249                                   benchmark_iterations_, disable_cpu_flags_,
250                                   benchmark_cpu_info_, +1, 0);
251   EXPECT_LE(max_diff, 2);
252 }
253 
TEST_F(LibYUVPlanarTest,TestARGBComputeCumulativeSum)254 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
255   SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]);
256   SIMD_ALIGNED(int32_t added_pixels[16][16][4]);
257 
258   for (int y = 0; y < 16; ++y) {
259     for (int x = 0; x < 16; ++x) {
260       orig_pixels[y][x][0] = 1u;
261       orig_pixels[y][x][1] = 2u;
262       orig_pixels[y][x][2] = 3u;
263       orig_pixels[y][x][3] = 255u;
264     }
265   }
266 
267   ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4,
268                            &added_pixels[0][0][0], 16 * 4, 16, 16);
269 
270   for (int y = 0; y < 16; ++y) {
271     for (int x = 0; x < 16; ++x) {
272       EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]);
273       EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]);
274       EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]);
275       EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]);
276     }
277   }
278 }
279 
TEST_F(LibYUVPlanarTest,TestARGBGray)280 TEST_F(LibYUVPlanarTest, TestARGBGray) {
281   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
282   memset(orig_pixels, 0, sizeof(orig_pixels));
283 
284   // Test blue
285   orig_pixels[0][0] = 255u;
286   orig_pixels[0][1] = 0u;
287   orig_pixels[0][2] = 0u;
288   orig_pixels[0][3] = 128u;
289   // Test green
290   orig_pixels[1][0] = 0u;
291   orig_pixels[1][1] = 255u;
292   orig_pixels[1][2] = 0u;
293   orig_pixels[1][3] = 0u;
294   // Test red
295   orig_pixels[2][0] = 0u;
296   orig_pixels[2][1] = 0u;
297   orig_pixels[2][2] = 255u;
298   orig_pixels[2][3] = 255u;
299   // Test black
300   orig_pixels[3][0] = 0u;
301   orig_pixels[3][1] = 0u;
302   orig_pixels[3][2] = 0u;
303   orig_pixels[3][3] = 255u;
304   // Test white
305   orig_pixels[4][0] = 255u;
306   orig_pixels[4][1] = 255u;
307   orig_pixels[4][2] = 255u;
308   orig_pixels[4][3] = 255u;
309   // Test color
310   orig_pixels[5][0] = 16u;
311   orig_pixels[5][1] = 64u;
312   orig_pixels[5][2] = 192u;
313   orig_pixels[5][3] = 224u;
314   // Do 16 to test asm version.
315   ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
316   EXPECT_EQ(30u, orig_pixels[0][0]);
317   EXPECT_EQ(30u, orig_pixels[0][1]);
318   EXPECT_EQ(30u, orig_pixels[0][2]);
319   EXPECT_EQ(128u, orig_pixels[0][3]);
320   EXPECT_EQ(149u, orig_pixels[1][0]);
321   EXPECT_EQ(149u, orig_pixels[1][1]);
322   EXPECT_EQ(149u, orig_pixels[1][2]);
323   EXPECT_EQ(0u, orig_pixels[1][3]);
324   EXPECT_EQ(76u, orig_pixels[2][0]);
325   EXPECT_EQ(76u, orig_pixels[2][1]);
326   EXPECT_EQ(76u, orig_pixels[2][2]);
327   EXPECT_EQ(255u, orig_pixels[2][3]);
328   EXPECT_EQ(0u, orig_pixels[3][0]);
329   EXPECT_EQ(0u, orig_pixels[3][1]);
330   EXPECT_EQ(0u, orig_pixels[3][2]);
331   EXPECT_EQ(255u, orig_pixels[3][3]);
332   EXPECT_EQ(255u, orig_pixels[4][0]);
333   EXPECT_EQ(255u, orig_pixels[4][1]);
334   EXPECT_EQ(255u, orig_pixels[4][2]);
335   EXPECT_EQ(255u, orig_pixels[4][3]);
336   EXPECT_EQ(96u, orig_pixels[5][0]);
337   EXPECT_EQ(96u, orig_pixels[5][1]);
338   EXPECT_EQ(96u, orig_pixels[5][2]);
339   EXPECT_EQ(224u, orig_pixels[5][3]);
340   for (int i = 0; i < 1280; ++i) {
341     orig_pixels[i][0] = i;
342     orig_pixels[i][1] = i / 2;
343     orig_pixels[i][2] = i / 3;
344     orig_pixels[i][3] = i;
345   }
346   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
347     ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
348   }
349 }
350 
TEST_F(LibYUVPlanarTest,TestARGBGrayTo)351 TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
352   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
353   SIMD_ALIGNED(uint8_t gray_pixels[1280][4]);
354   memset(orig_pixels, 0, sizeof(orig_pixels));
355 
356   // Test blue
357   orig_pixels[0][0] = 255u;
358   orig_pixels[0][1] = 0u;
359   orig_pixels[0][2] = 0u;
360   orig_pixels[0][3] = 128u;
361   // Test green
362   orig_pixels[1][0] = 0u;
363   orig_pixels[1][1] = 255u;
364   orig_pixels[1][2] = 0u;
365   orig_pixels[1][3] = 0u;
366   // Test red
367   orig_pixels[2][0] = 0u;
368   orig_pixels[2][1] = 0u;
369   orig_pixels[2][2] = 255u;
370   orig_pixels[2][3] = 255u;
371   // Test black
372   orig_pixels[3][0] = 0u;
373   orig_pixels[3][1] = 0u;
374   orig_pixels[3][2] = 0u;
375   orig_pixels[3][3] = 255u;
376   // Test white
377   orig_pixels[4][0] = 255u;
378   orig_pixels[4][1] = 255u;
379   orig_pixels[4][2] = 255u;
380   orig_pixels[4][3] = 255u;
381   // Test color
382   orig_pixels[5][0] = 16u;
383   orig_pixels[5][1] = 64u;
384   orig_pixels[5][2] = 192u;
385   orig_pixels[5][3] = 224u;
386   // Do 16 to test asm version.
387   ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
388   EXPECT_EQ(30u, gray_pixels[0][0]);
389   EXPECT_EQ(30u, gray_pixels[0][1]);
390   EXPECT_EQ(30u, gray_pixels[0][2]);
391   EXPECT_EQ(128u, gray_pixels[0][3]);
392   EXPECT_EQ(149u, gray_pixels[1][0]);
393   EXPECT_EQ(149u, gray_pixels[1][1]);
394   EXPECT_EQ(149u, gray_pixels[1][2]);
395   EXPECT_EQ(0u, gray_pixels[1][3]);
396   EXPECT_EQ(76u, gray_pixels[2][0]);
397   EXPECT_EQ(76u, gray_pixels[2][1]);
398   EXPECT_EQ(76u, gray_pixels[2][2]);
399   EXPECT_EQ(255u, gray_pixels[2][3]);
400   EXPECT_EQ(0u, gray_pixels[3][0]);
401   EXPECT_EQ(0u, gray_pixels[3][1]);
402   EXPECT_EQ(0u, gray_pixels[3][2]);
403   EXPECT_EQ(255u, gray_pixels[3][3]);
404   EXPECT_EQ(255u, gray_pixels[4][0]);
405   EXPECT_EQ(255u, gray_pixels[4][1]);
406   EXPECT_EQ(255u, gray_pixels[4][2]);
407   EXPECT_EQ(255u, gray_pixels[4][3]);
408   EXPECT_EQ(96u, gray_pixels[5][0]);
409   EXPECT_EQ(96u, gray_pixels[5][1]);
410   EXPECT_EQ(96u, gray_pixels[5][2]);
411   EXPECT_EQ(224u, gray_pixels[5][3]);
412   for (int i = 0; i < 1280; ++i) {
413     orig_pixels[i][0] = i;
414     orig_pixels[i][1] = i / 2;
415     orig_pixels[i][2] = i / 3;
416     orig_pixels[i][3] = i;
417   }
418   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
419     ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
420   }
421 }
422 
TEST_F(LibYUVPlanarTest,TestARGBSepia)423 TEST_F(LibYUVPlanarTest, TestARGBSepia) {
424   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
425   memset(orig_pixels, 0, sizeof(orig_pixels));
426 
427   // Test blue
428   orig_pixels[0][0] = 255u;
429   orig_pixels[0][1] = 0u;
430   orig_pixels[0][2] = 0u;
431   orig_pixels[0][3] = 128u;
432   // Test green
433   orig_pixels[1][0] = 0u;
434   orig_pixels[1][1] = 255u;
435   orig_pixels[1][2] = 0u;
436   orig_pixels[1][3] = 0u;
437   // Test red
438   orig_pixels[2][0] = 0u;
439   orig_pixels[2][1] = 0u;
440   orig_pixels[2][2] = 255u;
441   orig_pixels[2][3] = 255u;
442   // Test black
443   orig_pixels[3][0] = 0u;
444   orig_pixels[3][1] = 0u;
445   orig_pixels[3][2] = 0u;
446   orig_pixels[3][3] = 255u;
447   // Test white
448   orig_pixels[4][0] = 255u;
449   orig_pixels[4][1] = 255u;
450   orig_pixels[4][2] = 255u;
451   orig_pixels[4][3] = 255u;
452   // Test color
453   orig_pixels[5][0] = 16u;
454   orig_pixels[5][1] = 64u;
455   orig_pixels[5][2] = 192u;
456   orig_pixels[5][3] = 224u;
457   // Do 16 to test asm version.
458   ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
459   EXPECT_EQ(33u, orig_pixels[0][0]);
460   EXPECT_EQ(43u, orig_pixels[0][1]);
461   EXPECT_EQ(47u, orig_pixels[0][2]);
462   EXPECT_EQ(128u, orig_pixels[0][3]);
463   EXPECT_EQ(135u, orig_pixels[1][0]);
464   EXPECT_EQ(175u, orig_pixels[1][1]);
465   EXPECT_EQ(195u, orig_pixels[1][2]);
466   EXPECT_EQ(0u, orig_pixels[1][3]);
467   EXPECT_EQ(69u, orig_pixels[2][0]);
468   EXPECT_EQ(89u, orig_pixels[2][1]);
469   EXPECT_EQ(99u, orig_pixels[2][2]);
470   EXPECT_EQ(255u, orig_pixels[2][3]);
471   EXPECT_EQ(0u, orig_pixels[3][0]);
472   EXPECT_EQ(0u, orig_pixels[3][1]);
473   EXPECT_EQ(0u, orig_pixels[3][2]);
474   EXPECT_EQ(255u, orig_pixels[3][3]);
475   EXPECT_EQ(239u, orig_pixels[4][0]);
476   EXPECT_EQ(255u, orig_pixels[4][1]);
477   EXPECT_EQ(255u, orig_pixels[4][2]);
478   EXPECT_EQ(255u, orig_pixels[4][3]);
479   EXPECT_EQ(88u, orig_pixels[5][0]);
480   EXPECT_EQ(114u, orig_pixels[5][1]);
481   EXPECT_EQ(127u, orig_pixels[5][2]);
482   EXPECT_EQ(224u, orig_pixels[5][3]);
483 
484   for (int i = 0; i < 1280; ++i) {
485     orig_pixels[i][0] = i;
486     orig_pixels[i][1] = i / 2;
487     orig_pixels[i][2] = i / 3;
488     orig_pixels[i][3] = i;
489   }
490   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
491     ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
492   }
493 }
494 
TEST_F(LibYUVPlanarTest,TestARGBColorMatrix)495 TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
496   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
497   SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
498   SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
499 
500   // Matrix for Sepia.
501   SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
502       17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
503       24 / 2, 98 / 2, 50 / 2, 0, 0,      0,      0,      64,  // Copy alpha.
504   };
505   memset(orig_pixels, 0, sizeof(orig_pixels));
506 
507   // Test blue
508   orig_pixels[0][0] = 255u;
509   orig_pixels[0][1] = 0u;
510   orig_pixels[0][2] = 0u;
511   orig_pixels[0][3] = 128u;
512   // Test green
513   orig_pixels[1][0] = 0u;
514   orig_pixels[1][1] = 255u;
515   orig_pixels[1][2] = 0u;
516   orig_pixels[1][3] = 0u;
517   // Test red
518   orig_pixels[2][0] = 0u;
519   orig_pixels[2][1] = 0u;
520   orig_pixels[2][2] = 255u;
521   orig_pixels[2][3] = 255u;
522   // Test color
523   orig_pixels[3][0] = 16u;
524   orig_pixels[3][1] = 64u;
525   orig_pixels[3][2] = 192u;
526   orig_pixels[3][3] = 224u;
527   // Do 16 to test asm version.
528   ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
529                   &kRGBToSepia[0], 16, 1);
530   EXPECT_EQ(31u, dst_pixels_opt[0][0]);
531   EXPECT_EQ(43u, dst_pixels_opt[0][1]);
532   EXPECT_EQ(47u, dst_pixels_opt[0][2]);
533   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
534   EXPECT_EQ(135u, dst_pixels_opt[1][0]);
535   EXPECT_EQ(175u, dst_pixels_opt[1][1]);
536   EXPECT_EQ(195u, dst_pixels_opt[1][2]);
537   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
538   EXPECT_EQ(67u, dst_pixels_opt[2][0]);
539   EXPECT_EQ(87u, dst_pixels_opt[2][1]);
540   EXPECT_EQ(99u, dst_pixels_opt[2][2]);
541   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
542   EXPECT_EQ(87u, dst_pixels_opt[3][0]);
543   EXPECT_EQ(112u, dst_pixels_opt[3][1]);
544   EXPECT_EQ(127u, dst_pixels_opt[3][2]);
545   EXPECT_EQ(224u, dst_pixels_opt[3][3]);
546 
547   for (int i = 0; i < 1280; ++i) {
548     orig_pixels[i][0] = i;
549     orig_pixels[i][1] = i / 2;
550     orig_pixels[i][2] = i / 3;
551     orig_pixels[i][3] = i;
552   }
553   MaskCpuFlags(disable_cpu_flags_);
554   ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
555                   &kRGBToSepia[0], 1280, 1);
556   MaskCpuFlags(benchmark_cpu_info_);
557 
558   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
559     ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
560                     &kRGBToSepia[0], 1280, 1);
561   }
562 
563   for (int i = 0; i < 1280; ++i) {
564     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
565     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
566     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
567     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
568   }
569 }
570 
TEST_F(LibYUVPlanarTest,TestRGBColorMatrix)571 TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
572   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
573 
574   // Matrix for Sepia.
575   SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
576       17, 68, 35, 0, 22, 88, 45, 0,
577       24, 98, 50, 0, 0,  0,  0,  0,  // Unused but makes matrix 16 bytes.
578   };
579   memset(orig_pixels, 0, sizeof(orig_pixels));
580 
581   // Test blue
582   orig_pixels[0][0] = 255u;
583   orig_pixels[0][1] = 0u;
584   orig_pixels[0][2] = 0u;
585   orig_pixels[0][3] = 128u;
586   // Test green
587   orig_pixels[1][0] = 0u;
588   orig_pixels[1][1] = 255u;
589   orig_pixels[1][2] = 0u;
590   orig_pixels[1][3] = 0u;
591   // Test red
592   orig_pixels[2][0] = 0u;
593   orig_pixels[2][1] = 0u;
594   orig_pixels[2][2] = 255u;
595   orig_pixels[2][3] = 255u;
596   // Test color
597   orig_pixels[3][0] = 16u;
598   orig_pixels[3][1] = 64u;
599   orig_pixels[3][2] = 192u;
600   orig_pixels[3][3] = 224u;
601   // Do 16 to test asm version.
602   RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
603   EXPECT_EQ(31u, orig_pixels[0][0]);
604   EXPECT_EQ(43u, orig_pixels[0][1]);
605   EXPECT_EQ(47u, orig_pixels[0][2]);
606   EXPECT_EQ(128u, orig_pixels[0][3]);
607   EXPECT_EQ(135u, orig_pixels[1][0]);
608   EXPECT_EQ(175u, orig_pixels[1][1]);
609   EXPECT_EQ(195u, orig_pixels[1][2]);
610   EXPECT_EQ(0u, orig_pixels[1][3]);
611   EXPECT_EQ(67u, orig_pixels[2][0]);
612   EXPECT_EQ(87u, orig_pixels[2][1]);
613   EXPECT_EQ(99u, orig_pixels[2][2]);
614   EXPECT_EQ(255u, orig_pixels[2][3]);
615   EXPECT_EQ(87u, orig_pixels[3][0]);
616   EXPECT_EQ(112u, orig_pixels[3][1]);
617   EXPECT_EQ(127u, orig_pixels[3][2]);
618   EXPECT_EQ(224u, orig_pixels[3][3]);
619 
620   for (int i = 0; i < 1280; ++i) {
621     orig_pixels[i][0] = i;
622     orig_pixels[i][1] = i / 2;
623     orig_pixels[i][2] = i / 3;
624     orig_pixels[i][3] = i;
625   }
626   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
627     RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
628   }
629 }
630 
TEST_F(LibYUVPlanarTest,TestARGBColorTable)631 TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
632   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
633   memset(orig_pixels, 0, sizeof(orig_pixels));
634 
635   // Matrix for Sepia.
636   static const uint8_t kARGBTable[256 * 4] = {
637       1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
638   };
639 
640   orig_pixels[0][0] = 0u;
641   orig_pixels[0][1] = 0u;
642   orig_pixels[0][2] = 0u;
643   orig_pixels[0][3] = 0u;
644   orig_pixels[1][0] = 1u;
645   orig_pixels[1][1] = 1u;
646   orig_pixels[1][2] = 1u;
647   orig_pixels[1][3] = 1u;
648   orig_pixels[2][0] = 2u;
649   orig_pixels[2][1] = 2u;
650   orig_pixels[2][2] = 2u;
651   orig_pixels[2][3] = 2u;
652   orig_pixels[3][0] = 0u;
653   orig_pixels[3][1] = 1u;
654   orig_pixels[3][2] = 2u;
655   orig_pixels[3][3] = 3u;
656   // Do 16 to test asm version.
657   ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
658   EXPECT_EQ(1u, orig_pixels[0][0]);
659   EXPECT_EQ(2u, orig_pixels[0][1]);
660   EXPECT_EQ(3u, orig_pixels[0][2]);
661   EXPECT_EQ(4u, orig_pixels[0][3]);
662   EXPECT_EQ(5u, orig_pixels[1][0]);
663   EXPECT_EQ(6u, orig_pixels[1][1]);
664   EXPECT_EQ(7u, orig_pixels[1][2]);
665   EXPECT_EQ(8u, orig_pixels[1][3]);
666   EXPECT_EQ(9u, orig_pixels[2][0]);
667   EXPECT_EQ(10u, orig_pixels[2][1]);
668   EXPECT_EQ(11u, orig_pixels[2][2]);
669   EXPECT_EQ(12u, orig_pixels[2][3]);
670   EXPECT_EQ(1u, orig_pixels[3][0]);
671   EXPECT_EQ(6u, orig_pixels[3][1]);
672   EXPECT_EQ(11u, orig_pixels[3][2]);
673   EXPECT_EQ(16u, orig_pixels[3][3]);
674 
675   for (int i = 0; i < 1280; ++i) {
676     orig_pixels[i][0] = i;
677     orig_pixels[i][1] = i / 2;
678     orig_pixels[i][2] = i / 3;
679     orig_pixels[i][3] = i;
680   }
681   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
682     ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
683   }
684 }
685 
686 // Same as TestARGBColorTable except alpha does not change.
TEST_F(LibYUVPlanarTest,TestRGBColorTable)687 TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
688   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
689   memset(orig_pixels, 0, sizeof(orig_pixels));
690 
691   // Matrix for Sepia.
692   static const uint8_t kARGBTable[256 * 4] = {
693       1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
694   };
695 
696   orig_pixels[0][0] = 0u;
697   orig_pixels[0][1] = 0u;
698   orig_pixels[0][2] = 0u;
699   orig_pixels[0][3] = 0u;
700   orig_pixels[1][0] = 1u;
701   orig_pixels[1][1] = 1u;
702   orig_pixels[1][2] = 1u;
703   orig_pixels[1][3] = 1u;
704   orig_pixels[2][0] = 2u;
705   orig_pixels[2][1] = 2u;
706   orig_pixels[2][2] = 2u;
707   orig_pixels[2][3] = 2u;
708   orig_pixels[3][0] = 0u;
709   orig_pixels[3][1] = 1u;
710   orig_pixels[3][2] = 2u;
711   orig_pixels[3][3] = 3u;
712   // Do 16 to test asm version.
713   RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
714   EXPECT_EQ(1u, orig_pixels[0][0]);
715   EXPECT_EQ(2u, orig_pixels[0][1]);
716   EXPECT_EQ(3u, orig_pixels[0][2]);
717   EXPECT_EQ(0u, orig_pixels[0][3]);  // Alpha unchanged.
718   EXPECT_EQ(5u, orig_pixels[1][0]);
719   EXPECT_EQ(6u, orig_pixels[1][1]);
720   EXPECT_EQ(7u, orig_pixels[1][2]);
721   EXPECT_EQ(1u, orig_pixels[1][3]);  // Alpha unchanged.
722   EXPECT_EQ(9u, orig_pixels[2][0]);
723   EXPECT_EQ(10u, orig_pixels[2][1]);
724   EXPECT_EQ(11u, orig_pixels[2][2]);
725   EXPECT_EQ(2u, orig_pixels[2][3]);  // Alpha unchanged.
726   EXPECT_EQ(1u, orig_pixels[3][0]);
727   EXPECT_EQ(6u, orig_pixels[3][1]);
728   EXPECT_EQ(11u, orig_pixels[3][2]);
729   EXPECT_EQ(3u, orig_pixels[3][3]);  // Alpha unchanged.
730 
731   for (int i = 0; i < 1280; ++i) {
732     orig_pixels[i][0] = i;
733     orig_pixels[i][1] = i / 2;
734     orig_pixels[i][2] = i / 3;
735     orig_pixels[i][3] = i;
736   }
737   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
738     RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
739   }
740 }
741 
TEST_F(LibYUVPlanarTest,TestARGBQuantize)742 TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
743   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
744 
745   for (int i = 0; i < 1280; ++i) {
746     orig_pixels[i][0] = i;
747     orig_pixels[i][1] = i / 2;
748     orig_pixels[i][2] = i / 3;
749     orig_pixels[i][3] = i;
750   }
751   ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
752                1280, 1);
753 
754   for (int i = 0; i < 1280; ++i) {
755     EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]);
756     EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]);
757     EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]);
758     EXPECT_EQ(i & 255, orig_pixels[i][3]);
759   }
760   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
761     ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
762                  1280, 1);
763   }
764 }
765 
TEST_F(LibYUVPlanarTest,TestARGBMirror)766 TEST_F(LibYUVPlanarTest, TestARGBMirror) {
767   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
768   SIMD_ALIGNED(uint8_t dst_pixels[1280][4]);
769 
770   for (int i = 0; i < 1280; ++i) {
771     orig_pixels[i][0] = i;
772     orig_pixels[i][1] = i / 2;
773     orig_pixels[i][2] = i / 3;
774     orig_pixels[i][3] = i / 4;
775   }
776   ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
777 
778   for (int i = 0; i < 1280; ++i) {
779     EXPECT_EQ(i & 255, dst_pixels[1280 - 1 - i][0]);
780     EXPECT_EQ((i / 2) & 255, dst_pixels[1280 - 1 - i][1]);
781     EXPECT_EQ((i / 3) & 255, dst_pixels[1280 - 1 - i][2]);
782     EXPECT_EQ((i / 4) & 255, dst_pixels[1280 - 1 - i][3]);
783   }
784   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
785     ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 1280, 1);
786   }
787 }
788 
TEST_F(LibYUVPlanarTest,TestShade)789 TEST_F(LibYUVPlanarTest, TestShade) {
790   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
791   SIMD_ALIGNED(uint8_t shade_pixels[1280][4]);
792   memset(orig_pixels, 0, sizeof(orig_pixels));
793 
794   orig_pixels[0][0] = 10u;
795   orig_pixels[0][1] = 20u;
796   orig_pixels[0][2] = 40u;
797   orig_pixels[0][3] = 80u;
798   orig_pixels[1][0] = 0u;
799   orig_pixels[1][1] = 0u;
800   orig_pixels[1][2] = 0u;
801   orig_pixels[1][3] = 255u;
802   orig_pixels[2][0] = 0u;
803   orig_pixels[2][1] = 0u;
804   orig_pixels[2][2] = 0u;
805   orig_pixels[2][3] = 0u;
806   orig_pixels[3][0] = 0u;
807   orig_pixels[3][1] = 0u;
808   orig_pixels[3][2] = 0u;
809   orig_pixels[3][3] = 0u;
810   // Do 8 pixels to allow opt version to be used.
811   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff);
812   EXPECT_EQ(10u, shade_pixels[0][0]);
813   EXPECT_EQ(20u, shade_pixels[0][1]);
814   EXPECT_EQ(40u, shade_pixels[0][2]);
815   EXPECT_EQ(40u, shade_pixels[0][3]);
816   EXPECT_EQ(0u, shade_pixels[1][0]);
817   EXPECT_EQ(0u, shade_pixels[1][1]);
818   EXPECT_EQ(0u, shade_pixels[1][2]);
819   EXPECT_EQ(128u, shade_pixels[1][3]);
820   EXPECT_EQ(0u, shade_pixels[2][0]);
821   EXPECT_EQ(0u, shade_pixels[2][1]);
822   EXPECT_EQ(0u, shade_pixels[2][2]);
823   EXPECT_EQ(0u, shade_pixels[2][3]);
824   EXPECT_EQ(0u, shade_pixels[3][0]);
825   EXPECT_EQ(0u, shade_pixels[3][1]);
826   EXPECT_EQ(0u, shade_pixels[3][2]);
827   EXPECT_EQ(0u, shade_pixels[3][3]);
828 
829   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080);
830   EXPECT_EQ(5u, shade_pixels[0][0]);
831   EXPECT_EQ(10u, shade_pixels[0][1]);
832   EXPECT_EQ(20u, shade_pixels[0][2]);
833   EXPECT_EQ(40u, shade_pixels[0][3]);
834 
835   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080);
836   EXPECT_EQ(5u, shade_pixels[0][0]);
837   EXPECT_EQ(5u, shade_pixels[0][1]);
838   EXPECT_EQ(5u, shade_pixels[0][2]);
839   EXPECT_EQ(5u, shade_pixels[0][3]);
840 
841   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
842     ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1,
843               0x80808080);
844   }
845 }
846 
TEST_F(LibYUVPlanarTest,TestARGBInterpolate)847 TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
848   SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
849   SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]);
850   SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]);
851   memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
852   memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
853 
854   orig_pixels_0[0][0] = 16u;
855   orig_pixels_0[0][1] = 32u;
856   orig_pixels_0[0][2] = 64u;
857   orig_pixels_0[0][3] = 128u;
858   orig_pixels_0[1][0] = 0u;
859   orig_pixels_0[1][1] = 0u;
860   orig_pixels_0[1][2] = 0u;
861   orig_pixels_0[1][3] = 255u;
862   orig_pixels_0[2][0] = 0u;
863   orig_pixels_0[2][1] = 0u;
864   orig_pixels_0[2][2] = 0u;
865   orig_pixels_0[2][3] = 0u;
866   orig_pixels_0[3][0] = 0u;
867   orig_pixels_0[3][1] = 0u;
868   orig_pixels_0[3][2] = 0u;
869   orig_pixels_0[3][3] = 0u;
870 
871   orig_pixels_1[0][0] = 0u;
872   orig_pixels_1[0][1] = 0u;
873   orig_pixels_1[0][2] = 0u;
874   orig_pixels_1[0][3] = 0u;
875   orig_pixels_1[1][0] = 0u;
876   orig_pixels_1[1][1] = 0u;
877   orig_pixels_1[1][2] = 0u;
878   orig_pixels_1[1][3] = 0u;
879   orig_pixels_1[2][0] = 0u;
880   orig_pixels_1[2][1] = 0u;
881   orig_pixels_1[2][2] = 0u;
882   orig_pixels_1[2][3] = 0u;
883   orig_pixels_1[3][0] = 255u;
884   orig_pixels_1[3][1] = 255u;
885   orig_pixels_1[3][2] = 255u;
886   orig_pixels_1[3][3] = 255u;
887 
888   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
889                   &interpolate_pixels[0][0], 0, 4, 1, 128);
890   EXPECT_EQ(8u, interpolate_pixels[0][0]);
891   EXPECT_EQ(16u, interpolate_pixels[0][1]);
892   EXPECT_EQ(32u, interpolate_pixels[0][2]);
893   EXPECT_EQ(64u, interpolate_pixels[0][3]);
894   EXPECT_EQ(0u, interpolate_pixels[1][0]);
895   EXPECT_EQ(0u, interpolate_pixels[1][1]);
896   EXPECT_EQ(0u, interpolate_pixels[1][2]);
897   EXPECT_EQ(128u, interpolate_pixels[1][3]);
898   EXPECT_EQ(0u, interpolate_pixels[2][0]);
899   EXPECT_EQ(0u, interpolate_pixels[2][1]);
900   EXPECT_EQ(0u, interpolate_pixels[2][2]);
901   EXPECT_EQ(0u, interpolate_pixels[2][3]);
902   EXPECT_EQ(128u, interpolate_pixels[3][0]);
903   EXPECT_EQ(128u, interpolate_pixels[3][1]);
904   EXPECT_EQ(128u, interpolate_pixels[3][2]);
905   EXPECT_EQ(128u, interpolate_pixels[3][3]);
906 
907   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
908                   &interpolate_pixels[0][0], 0, 4, 1, 0);
909   EXPECT_EQ(16u, interpolate_pixels[0][0]);
910   EXPECT_EQ(32u, interpolate_pixels[0][1]);
911   EXPECT_EQ(64u, interpolate_pixels[0][2]);
912   EXPECT_EQ(128u, interpolate_pixels[0][3]);
913 
914   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
915                   &interpolate_pixels[0][0], 0, 4, 1, 192);
916 
917   EXPECT_EQ(4u, interpolate_pixels[0][0]);
918   EXPECT_EQ(8u, interpolate_pixels[0][1]);
919   EXPECT_EQ(16u, interpolate_pixels[0][2]);
920   EXPECT_EQ(32u, interpolate_pixels[0][3]);
921 
922   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
923     ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
924                     &interpolate_pixels[0][0], 0, 1280, 1, 128);
925   }
926 }
927 
TEST_F(LibYUVPlanarTest,TestInterpolatePlane)928 TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
929   SIMD_ALIGNED(uint8_t orig_pixels_0[1280]);
930   SIMD_ALIGNED(uint8_t orig_pixels_1[1280]);
931   SIMD_ALIGNED(uint8_t interpolate_pixels[1280]);
932   memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
933   memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
934 
935   orig_pixels_0[0] = 16u;
936   orig_pixels_0[1] = 32u;
937   orig_pixels_0[2] = 64u;
938   orig_pixels_0[3] = 128u;
939   orig_pixels_0[4] = 0u;
940   orig_pixels_0[5] = 0u;
941   orig_pixels_0[6] = 0u;
942   orig_pixels_0[7] = 255u;
943   orig_pixels_0[8] = 0u;
944   orig_pixels_0[9] = 0u;
945   orig_pixels_0[10] = 0u;
946   orig_pixels_0[11] = 0u;
947   orig_pixels_0[12] = 0u;
948   orig_pixels_0[13] = 0u;
949   orig_pixels_0[14] = 0u;
950   orig_pixels_0[15] = 0u;
951 
952   orig_pixels_1[0] = 0u;
953   orig_pixels_1[1] = 0u;
954   orig_pixels_1[2] = 0u;
955   orig_pixels_1[3] = 0u;
956   orig_pixels_1[4] = 0u;
957   orig_pixels_1[5] = 0u;
958   orig_pixels_1[6] = 0u;
959   orig_pixels_1[7] = 0u;
960   orig_pixels_1[8] = 0u;
961   orig_pixels_1[9] = 0u;
962   orig_pixels_1[10] = 0u;
963   orig_pixels_1[11] = 0u;
964   orig_pixels_1[12] = 255u;
965   orig_pixels_1[13] = 255u;
966   orig_pixels_1[14] = 255u;
967   orig_pixels_1[15] = 255u;
968 
969   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
970                    &interpolate_pixels[0], 0, 16, 1, 128);
971   EXPECT_EQ(8u, interpolate_pixels[0]);
972   EXPECT_EQ(16u, interpolate_pixels[1]);
973   EXPECT_EQ(32u, interpolate_pixels[2]);
974   EXPECT_EQ(64u, interpolate_pixels[3]);
975   EXPECT_EQ(0u, interpolate_pixels[4]);
976   EXPECT_EQ(0u, interpolate_pixels[5]);
977   EXPECT_EQ(0u, interpolate_pixels[6]);
978   EXPECT_EQ(128u, interpolate_pixels[7]);
979   EXPECT_EQ(0u, interpolate_pixels[8]);
980   EXPECT_EQ(0u, interpolate_pixels[9]);
981   EXPECT_EQ(0u, interpolate_pixels[10]);
982   EXPECT_EQ(0u, interpolate_pixels[11]);
983   EXPECT_EQ(128u, interpolate_pixels[12]);
984   EXPECT_EQ(128u, interpolate_pixels[13]);
985   EXPECT_EQ(128u, interpolate_pixels[14]);
986   EXPECT_EQ(128u, interpolate_pixels[15]);
987 
988   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
989                    &interpolate_pixels[0], 0, 16, 1, 0);
990   EXPECT_EQ(16u, interpolate_pixels[0]);
991   EXPECT_EQ(32u, interpolate_pixels[1]);
992   EXPECT_EQ(64u, interpolate_pixels[2]);
993   EXPECT_EQ(128u, interpolate_pixels[3]);
994 
995   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
996                    &interpolate_pixels[0], 0, 16, 1, 192);
997 
998   EXPECT_EQ(4u, interpolate_pixels[0]);
999   EXPECT_EQ(8u, interpolate_pixels[1]);
1000   EXPECT_EQ(16u, interpolate_pixels[2]);
1001   EXPECT_EQ(32u, interpolate_pixels[3]);
1002 
1003   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1004     InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1005                      &interpolate_pixels[0], 0, 1280, 1, 123);
1006   }
1007 }
1008 
1009 #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
1010                  N, NEG, OFF)                                                 \
1011   TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) {                        \
1012     const int kWidth = ((W1280) > 0) ? (W1280) : 1;                           \
1013     const int kHeight = benchmark_height_;                                    \
1014     const int kStrideA =                                                      \
1015         (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;                \
1016     const int kStrideB =                                                      \
1017         (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;                \
1018     align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF);               \
1019     align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF);               \
1020     align_buffer_page_end(dst_argb_c, kStrideB* kHeight);                     \
1021     align_buffer_page_end(dst_argb_opt, kStrideB* kHeight);                   \
1022     for (int i = 0; i < kStrideA * kHeight; ++i) {                            \
1023       src_argb_a[i + OFF] = (fastrand() & 0xff);                              \
1024       src_argb_b[i + OFF] = (fastrand() & 0xff);                              \
1025     }                                                                         \
1026     MaskCpuFlags(disable_cpu_flags_);                                         \
1027     ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA,   \
1028                     dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP);         \
1029     MaskCpuFlags(benchmark_cpu_info_);                                        \
1030     for (int i = 0; i < benchmark_iterations_; ++i) {                         \
1031       ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1032                       dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP);     \
1033     }                                                                         \
1034     for (int i = 0; i < kStrideB * kHeight; ++i) {                            \
1035       EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                              \
1036     }                                                                         \
1037     free_aligned_buffer_page_end(src_argb_a);                                 \
1038     free_aligned_buffer_page_end(src_argb_b);                                 \
1039     free_aligned_buffer_page_end(dst_argb_c);                                 \
1040     free_aligned_buffer_page_end(dst_argb_opt);                               \
1041   }
1042 
1043 #define TESTINTERPOLATE(TERP)                                                \
1044   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ - 1, TERP, _Any, +, 0)   \
1045   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
1046   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0)    \
1047   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
1048 
1049 TESTINTERPOLATE(0)
1050 TESTINTERPOLATE(64)
1051 TESTINTERPOLATE(128)
1052 TESTINTERPOLATE(192)
1053 TESTINTERPOLATE(255)
1054 
TestBlend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1055 static int TestBlend(int width,
1056                      int height,
1057                      int benchmark_iterations,
1058                      int disable_cpu_flags,
1059                      int benchmark_cpu_info,
1060                      int invert,
1061                      int off) {
1062   if (width < 1) {
1063     width = 1;
1064   }
1065   const int kBpp = 4;
1066   const int kStride = width * kBpp;
1067   align_buffer_page_end(src_argb_a, kStride * height + off);
1068   align_buffer_page_end(src_argb_b, kStride * height + off);
1069   align_buffer_page_end(dst_argb_c, kStride * height);
1070   align_buffer_page_end(dst_argb_opt, kStride * height);
1071   for (int i = 0; i < kStride * height; ++i) {
1072     src_argb_a[i + off] = (fastrand() & 0xff);
1073     src_argb_b[i + off] = (fastrand() & 0xff);
1074   }
1075   ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
1076                 height);
1077   ARGBAttenuate(src_argb_b + off, kStride, src_argb_b + off, kStride, width,
1078                 height);
1079   memset(dst_argb_c, 255, kStride * height);
1080   memset(dst_argb_opt, 255, kStride * height);
1081 
1082   MaskCpuFlags(disable_cpu_flags);
1083   ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1084             kStride, width, invert * height);
1085   MaskCpuFlags(benchmark_cpu_info);
1086   for (int i = 0; i < benchmark_iterations; ++i) {
1087     ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride,
1088               dst_argb_opt, kStride, width, invert * height);
1089   }
1090   int max_diff = 0;
1091   for (int i = 0; i < kStride * height; ++i) {
1092     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1093                        static_cast<int>(dst_argb_opt[i]));
1094     if (abs_diff > max_diff) {
1095       max_diff = abs_diff;
1096     }
1097   }
1098   free_aligned_buffer_page_end(src_argb_a);
1099   free_aligned_buffer_page_end(src_argb_b);
1100   free_aligned_buffer_page_end(dst_argb_c);
1101   free_aligned_buffer_page_end(dst_argb_opt);
1102   return max_diff;
1103 }
1104 
TEST_F(LibYUVPlanarTest,ARGBBlend_Any)1105 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
1106   int max_diff =
1107       TestBlend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1108                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1109   EXPECT_LE(max_diff, 1);
1110 }
1111 
TEST_F(LibYUVPlanarTest,ARGBBlend_Unaligned)1112 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
1113   int max_diff =
1114       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1115                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1116   EXPECT_LE(max_diff, 1);
1117 }
1118 
TEST_F(LibYUVPlanarTest,ARGBBlend_Invert)1119 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
1120   int max_diff =
1121       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1122                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1123   EXPECT_LE(max_diff, 1);
1124 }
1125 
TEST_F(LibYUVPlanarTest,ARGBBlend_Opt)1126 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
1127   int max_diff =
1128       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1129                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1130   EXPECT_LE(max_diff, 1);
1131 }
1132 
TestBlendPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1133 static void TestBlendPlane(int width,
1134                            int height,
1135                            int benchmark_iterations,
1136                            int disable_cpu_flags,
1137                            int benchmark_cpu_info,
1138                            int invert,
1139                            int off) {
1140   if (width < 1) {
1141     width = 1;
1142   }
1143   const int kBpp = 1;
1144   const int kStride = width * kBpp;
1145   align_buffer_page_end(src_argb_a, kStride * height + off);
1146   align_buffer_page_end(src_argb_b, kStride * height + off);
1147   align_buffer_page_end(src_argb_alpha, kStride * height + off);
1148   align_buffer_page_end(dst_argb_c, kStride * height + off);
1149   align_buffer_page_end(dst_argb_opt, kStride * height + off);
1150   memset(dst_argb_c, 255, kStride * height + off);
1151   memset(dst_argb_opt, 255, kStride * height + off);
1152 
1153   // Test source is maintained exactly if alpha is 255.
1154   for (int i = 0; i < width; ++i) {
1155     src_argb_a[i + off] = i & 255;
1156     src_argb_b[i + off] = 255 - (i & 255);
1157   }
1158   memset(src_argb_alpha + off, 255, width);
1159   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1160              src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1161   for (int i = 0; i < width; ++i) {
1162     EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
1163   }
1164   // Test destination is maintained exactly if alpha is 0.
1165   memset(src_argb_alpha + off, 0, width);
1166   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1167              src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1168   for (int i = 0; i < width; ++i) {
1169     EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
1170   }
1171   for (int i = 0; i < kStride * height; ++i) {
1172     src_argb_a[i + off] = (fastrand() & 0xff);
1173     src_argb_b[i + off] = (fastrand() & 0xff);
1174     src_argb_alpha[i + off] = (fastrand() & 0xff);
1175   }
1176 
1177   MaskCpuFlags(disable_cpu_flags);
1178   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1179              src_argb_alpha + off, width, dst_argb_c + off, width, width,
1180              invert * height);
1181   MaskCpuFlags(benchmark_cpu_info);
1182   for (int i = 0; i < benchmark_iterations; ++i) {
1183     BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1184                src_argb_alpha + off, width, dst_argb_opt + off, width, width,
1185                invert * height);
1186   }
1187   for (int i = 0; i < kStride * height; ++i) {
1188     EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
1189   }
1190   free_aligned_buffer_page_end(src_argb_a);
1191   free_aligned_buffer_page_end(src_argb_b);
1192   free_aligned_buffer_page_end(src_argb_alpha);
1193   free_aligned_buffer_page_end(dst_argb_c);
1194   free_aligned_buffer_page_end(dst_argb_opt);
1195 }
1196 
TEST_F(LibYUVPlanarTest,BlendPlane_Opt)1197 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
1198   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1199                  disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1200 }
TEST_F(LibYUVPlanarTest,BlendPlane_Unaligned)1201 TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
1202   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1203                  disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1204 }
TEST_F(LibYUVPlanarTest,BlendPlane_Any)1205 TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
1206   TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1207                  disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1208 }
TEST_F(LibYUVPlanarTest,BlendPlane_Invert)1209 TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
1210   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1211                  disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
1212 }
1213 
1214 #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
1215 
TestI420Blend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1216 static void TestI420Blend(int width,
1217                           int height,
1218                           int benchmark_iterations,
1219                           int disable_cpu_flags,
1220                           int benchmark_cpu_info,
1221                           int invert,
1222                           int off) {
1223   width = ((width) > 0) ? (width) : 1;
1224   const int kStrideUV = SUBSAMPLE(width, 2);
1225   const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
1226   align_buffer_page_end(src_y0, width * height + off);
1227   align_buffer_page_end(src_u0, kSizeUV + off);
1228   align_buffer_page_end(src_v0, kSizeUV + off);
1229   align_buffer_page_end(src_y1, width * height + off);
1230   align_buffer_page_end(src_u1, kSizeUV + off);
1231   align_buffer_page_end(src_v1, kSizeUV + off);
1232   align_buffer_page_end(src_a, width * height + off);
1233   align_buffer_page_end(dst_y_c, width * height + off);
1234   align_buffer_page_end(dst_u_c, kSizeUV + off);
1235   align_buffer_page_end(dst_v_c, kSizeUV + off);
1236   align_buffer_page_end(dst_y_opt, width * height + off);
1237   align_buffer_page_end(dst_u_opt, kSizeUV + off);
1238   align_buffer_page_end(dst_v_opt, kSizeUV + off);
1239 
1240   MemRandomize(src_y0, width * height + off);
1241   MemRandomize(src_u0, kSizeUV + off);
1242   MemRandomize(src_v0, kSizeUV + off);
1243   MemRandomize(src_y1, width * height + off);
1244   MemRandomize(src_u1, kSizeUV + off);
1245   MemRandomize(src_v1, kSizeUV + off);
1246   MemRandomize(src_a, width * height + off);
1247   memset(dst_y_c, 255, width * height + off);
1248   memset(dst_u_c, 255, kSizeUV + off);
1249   memset(dst_v_c, 255, kSizeUV + off);
1250   memset(dst_y_opt, 255, width * height + off);
1251   memset(dst_u_opt, 255, kSizeUV + off);
1252   memset(dst_v_opt, 255, kSizeUV + off);
1253 
1254   MaskCpuFlags(disable_cpu_flags);
1255   I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1256             kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1257             src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width,
1258             dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width,
1259             invert * height);
1260   MaskCpuFlags(benchmark_cpu_info);
1261   for (int i = 0; i < benchmark_iterations; ++i) {
1262     I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1263               kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1264               src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off,
1265               width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV,
1266               width, invert * height);
1267   }
1268   for (int i = 0; i < width * height; ++i) {
1269     EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
1270   }
1271   for (int i = 0; i < kSizeUV; ++i) {
1272     EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]);
1273     EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]);
1274   }
1275   free_aligned_buffer_page_end(src_y0);
1276   free_aligned_buffer_page_end(src_u0);
1277   free_aligned_buffer_page_end(src_v0);
1278   free_aligned_buffer_page_end(src_y1);
1279   free_aligned_buffer_page_end(src_u1);
1280   free_aligned_buffer_page_end(src_v1);
1281   free_aligned_buffer_page_end(src_a);
1282   free_aligned_buffer_page_end(dst_y_c);
1283   free_aligned_buffer_page_end(dst_u_c);
1284   free_aligned_buffer_page_end(dst_v_c);
1285   free_aligned_buffer_page_end(dst_y_opt);
1286   free_aligned_buffer_page_end(dst_u_opt);
1287   free_aligned_buffer_page_end(dst_v_opt);
1288 }
1289 
TEST_F(LibYUVPlanarTest,I420Blend_Opt)1290 TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
1291   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1292                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1293 }
TEST_F(LibYUVPlanarTest,I420Blend_Unaligned)1294 TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
1295   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1296                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1297 }
1298 
1299 // TODO(fbarchard): DISABLED because _Any uses C.  Avoid C and re-enable.
TEST_F(LibYUVPlanarTest,DISABLED_I420Blend_Any)1300 TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
1301   TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
1302                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1303 }
TEST_F(LibYUVPlanarTest,I420Blend_Invert)1304 TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
1305   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1306                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1307 }
1308 
TEST_F(LibYUVPlanarTest,TestAffine)1309 TEST_F(LibYUVPlanarTest, TestAffine) {
1310   SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
1311   SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]);
1312 
1313   for (int i = 0; i < 1280; ++i) {
1314     for (int j = 0; j < 4; ++j) {
1315       orig_pixels_0[i][j] = i;
1316     }
1317   }
1318 
1319   float uv_step[4] = {0.f, 0.f, 0.75f, 0.f};
1320 
1321   ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step,
1322                   1280);
1323   EXPECT_EQ(0u, interpolate_pixels_C[0][0]);
1324   EXPECT_EQ(96u, interpolate_pixels_C[128][0]);
1325   EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
1326 
1327 #if defined(HAS_ARGBAFFINEROW_SSE2)
1328   SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]);
1329   ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1330                      uv_step, 1280);
1331   EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
1332 
1333   int has_sse2 = TestCpuFlag(kCpuHasSSE2);
1334   if (has_sse2) {
1335     for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1336       ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1337                          uv_step, 1280);
1338     }
1339   }
1340 #endif
1341 }
1342 
TEST_F(LibYUVPlanarTest,TestCopyPlane)1343 TEST_F(LibYUVPlanarTest, TestCopyPlane) {
1344   int err = 0;
1345   int yw = benchmark_width_;
1346   int yh = benchmark_height_;
1347   int b = 12;
1348   int i, j;
1349 
1350   int y_plane_size = (yw + b * 2) * (yh + b * 2);
1351   align_buffer_page_end(orig_y, y_plane_size);
1352   align_buffer_page_end(dst_c, y_plane_size);
1353   align_buffer_page_end(dst_opt, y_plane_size);
1354 
1355   memset(orig_y, 0, y_plane_size);
1356   memset(dst_c, 0, y_plane_size);
1357   memset(dst_opt, 0, y_plane_size);
1358 
1359   // Fill image buffers with random data.
1360   for (i = b; i < (yh + b); ++i) {
1361     for (j = b; j < (yw + b); ++j) {
1362       orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
1363     }
1364   }
1365 
1366   // Fill destination buffers with random data.
1367   for (i = 0; i < y_plane_size; ++i) {
1368     uint8_t random_number = fastrand() & 0x7f;
1369     dst_c[i] = random_number;
1370     dst_opt[i] = dst_c[i];
1371   }
1372 
1373   int y_off = b * (yw + b * 2) + b;
1374 
1375   int y_st = yw + b * 2;
1376   int stride = 8;
1377 
1378   // Disable all optimizations.
1379   MaskCpuFlags(disable_cpu_flags_);
1380   for (j = 0; j < benchmark_iterations_; j++) {
1381     CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
1382   }
1383 
1384   // Enable optimizations.
1385   MaskCpuFlags(benchmark_cpu_info_);
1386   for (j = 0; j < benchmark_iterations_; j++) {
1387     CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
1388   }
1389 
1390   for (i = 0; i < y_plane_size; ++i) {
1391     if (dst_c[i] != dst_opt[i]) {
1392       ++err;
1393     }
1394   }
1395 
1396   free_aligned_buffer_page_end(orig_y);
1397   free_aligned_buffer_page_end(dst_c);
1398   free_aligned_buffer_page_end(dst_opt);
1399 
1400   EXPECT_EQ(0, err);
1401 }
1402 
TestMultiply(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1403 static int TestMultiply(int width,
1404                         int height,
1405                         int benchmark_iterations,
1406                         int disable_cpu_flags,
1407                         int benchmark_cpu_info,
1408                         int invert,
1409                         int off) {
1410   if (width < 1) {
1411     width = 1;
1412   }
1413   const int kBpp = 4;
1414   const int kStride = width * kBpp;
1415   align_buffer_page_end(src_argb_a, kStride * height + off);
1416   align_buffer_page_end(src_argb_b, kStride * height + off);
1417   align_buffer_page_end(dst_argb_c, kStride * height);
1418   align_buffer_page_end(dst_argb_opt, kStride * height);
1419   for (int i = 0; i < kStride * height; ++i) {
1420     src_argb_a[i + off] = (fastrand() & 0xff);
1421     src_argb_b[i + off] = (fastrand() & 0xff);
1422   }
1423   memset(dst_argb_c, 0, kStride * height);
1424   memset(dst_argb_opt, 0, kStride * height);
1425 
1426   MaskCpuFlags(disable_cpu_flags);
1427   ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1428                kStride, width, invert * height);
1429   MaskCpuFlags(benchmark_cpu_info);
1430   for (int i = 0; i < benchmark_iterations; ++i) {
1431     ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride,
1432                  dst_argb_opt, kStride, width, invert * height);
1433   }
1434   int max_diff = 0;
1435   for (int i = 0; i < kStride * height; ++i) {
1436     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1437                        static_cast<int>(dst_argb_opt[i]));
1438     if (abs_diff > max_diff) {
1439       max_diff = abs_diff;
1440     }
1441   }
1442   free_aligned_buffer_page_end(src_argb_a);
1443   free_aligned_buffer_page_end(src_argb_b);
1444   free_aligned_buffer_page_end(dst_argb_c);
1445   free_aligned_buffer_page_end(dst_argb_opt);
1446   return max_diff;
1447 }
1448 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Any)1449 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
1450   int max_diff = TestMultiply(benchmark_width_ - 1, benchmark_height_,
1451                               benchmark_iterations_, disable_cpu_flags_,
1452                               benchmark_cpu_info_, +1, 0);
1453   EXPECT_LE(max_diff, 1);
1454 }
1455 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Unaligned)1456 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
1457   int max_diff =
1458       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1459                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1460   EXPECT_LE(max_diff, 1);
1461 }
1462 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Invert)1463 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
1464   int max_diff =
1465       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1466                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1467   EXPECT_LE(max_diff, 1);
1468 }
1469 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Opt)1470 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
1471   int max_diff =
1472       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1473                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1474   EXPECT_LE(max_diff, 1);
1475 }
1476 
TestAdd(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1477 static int TestAdd(int width,
1478                    int height,
1479                    int benchmark_iterations,
1480                    int disable_cpu_flags,
1481                    int benchmark_cpu_info,
1482                    int invert,
1483                    int off) {
1484   if (width < 1) {
1485     width = 1;
1486   }
1487   const int kBpp = 4;
1488   const int kStride = width * kBpp;
1489   align_buffer_page_end(src_argb_a, kStride * height + off);
1490   align_buffer_page_end(src_argb_b, kStride * height + off);
1491   align_buffer_page_end(dst_argb_c, kStride * height);
1492   align_buffer_page_end(dst_argb_opt, kStride * height);
1493   for (int i = 0; i < kStride * height; ++i) {
1494     src_argb_a[i + off] = (fastrand() & 0xff);
1495     src_argb_b[i + off] = (fastrand() & 0xff);
1496   }
1497   memset(dst_argb_c, 0, kStride * height);
1498   memset(dst_argb_opt, 0, kStride * height);
1499 
1500   MaskCpuFlags(disable_cpu_flags);
1501   ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1502           kStride, width, invert * height);
1503   MaskCpuFlags(benchmark_cpu_info);
1504   for (int i = 0; i < benchmark_iterations; ++i) {
1505     ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt,
1506             kStride, width, invert * height);
1507   }
1508   int max_diff = 0;
1509   for (int i = 0; i < kStride * height; ++i) {
1510     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1511                        static_cast<int>(dst_argb_opt[i]));
1512     if (abs_diff > max_diff) {
1513       max_diff = abs_diff;
1514     }
1515   }
1516   free_aligned_buffer_page_end(src_argb_a);
1517   free_aligned_buffer_page_end(src_argb_b);
1518   free_aligned_buffer_page_end(dst_argb_c);
1519   free_aligned_buffer_page_end(dst_argb_opt);
1520   return max_diff;
1521 }
1522 
TEST_F(LibYUVPlanarTest,ARGBAdd_Any)1523 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
1524   int max_diff =
1525       TestAdd(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1526               disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1527   EXPECT_LE(max_diff, 1);
1528 }
1529 
TEST_F(LibYUVPlanarTest,ARGBAdd_Unaligned)1530 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) {
1531   int max_diff =
1532       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1533               disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1534   EXPECT_LE(max_diff, 1);
1535 }
1536 
TEST_F(LibYUVPlanarTest,ARGBAdd_Invert)1537 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) {
1538   int max_diff =
1539       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1540               disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1541   EXPECT_LE(max_diff, 1);
1542 }
1543 
TEST_F(LibYUVPlanarTest,ARGBAdd_Opt)1544 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) {
1545   int max_diff =
1546       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1547               disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1548   EXPECT_LE(max_diff, 1);
1549 }
1550 
TestSubtract(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1551 static int TestSubtract(int width,
1552                         int height,
1553                         int benchmark_iterations,
1554                         int disable_cpu_flags,
1555                         int benchmark_cpu_info,
1556                         int invert,
1557                         int off) {
1558   if (width < 1) {
1559     width = 1;
1560   }
1561   const int kBpp = 4;
1562   const int kStride = width * kBpp;
1563   align_buffer_page_end(src_argb_a, kStride * height + off);
1564   align_buffer_page_end(src_argb_b, kStride * height + off);
1565   align_buffer_page_end(dst_argb_c, kStride * height);
1566   align_buffer_page_end(dst_argb_opt, kStride * height);
1567   for (int i = 0; i < kStride * height; ++i) {
1568     src_argb_a[i + off] = (fastrand() & 0xff);
1569     src_argb_b[i + off] = (fastrand() & 0xff);
1570   }
1571   memset(dst_argb_c, 0, kStride * height);
1572   memset(dst_argb_opt, 0, kStride * height);
1573 
1574   MaskCpuFlags(disable_cpu_flags);
1575   ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1576                kStride, width, invert * height);
1577   MaskCpuFlags(benchmark_cpu_info);
1578   for (int i = 0; i < benchmark_iterations; ++i) {
1579     ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride,
1580                  dst_argb_opt, kStride, width, invert * height);
1581   }
1582   int max_diff = 0;
1583   for (int i = 0; i < kStride * height; ++i) {
1584     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1585                        static_cast<int>(dst_argb_opt[i]));
1586     if (abs_diff > max_diff) {
1587       max_diff = abs_diff;
1588     }
1589   }
1590   free_aligned_buffer_page_end(src_argb_a);
1591   free_aligned_buffer_page_end(src_argb_b);
1592   free_aligned_buffer_page_end(dst_argb_c);
1593   free_aligned_buffer_page_end(dst_argb_opt);
1594   return max_diff;
1595 }
1596 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Any)1597 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
1598   int max_diff = TestSubtract(benchmark_width_ - 1, benchmark_height_,
1599                               benchmark_iterations_, disable_cpu_flags_,
1600                               benchmark_cpu_info_, +1, 0);
1601   EXPECT_LE(max_diff, 1);
1602 }
1603 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Unaligned)1604 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) {
1605   int max_diff =
1606       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1607                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1608   EXPECT_LE(max_diff, 1);
1609 }
1610 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Invert)1611 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) {
1612   int max_diff =
1613       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1614                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1615   EXPECT_LE(max_diff, 1);
1616 }
1617 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Opt)1618 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) {
1619   int max_diff =
1620       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
1621                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1622   EXPECT_LE(max_diff, 1);
1623 }
1624 
TestSobel(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1625 static int TestSobel(int width,
1626                      int height,
1627                      int benchmark_iterations,
1628                      int disable_cpu_flags,
1629                      int benchmark_cpu_info,
1630                      int invert,
1631                      int off) {
1632   if (width < 1) {
1633     width = 1;
1634   }
1635   const int kBpp = 4;
1636   const int kStride = width * kBpp;
1637   align_buffer_page_end(src_argb_a, kStride * height + off);
1638   align_buffer_page_end(dst_argb_c, kStride * height);
1639   align_buffer_page_end(dst_argb_opt, kStride * height);
1640   memset(src_argb_a, 0, kStride * height + off);
1641   for (int i = 0; i < kStride * height; ++i) {
1642     src_argb_a[i + off] = (fastrand() & 0xff);
1643   }
1644   memset(dst_argb_c, 0, kStride * height);
1645   memset(dst_argb_opt, 0, kStride * height);
1646 
1647   MaskCpuFlags(disable_cpu_flags);
1648   ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width,
1649             invert * height);
1650   MaskCpuFlags(benchmark_cpu_info);
1651   for (int i = 0; i < benchmark_iterations; ++i) {
1652     ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
1653               invert * height);
1654   }
1655   int max_diff = 0;
1656   for (int i = 0; i < kStride * height; ++i) {
1657     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1658                        static_cast<int>(dst_argb_opt[i]));
1659     if (abs_diff > max_diff) {
1660       max_diff = abs_diff;
1661     }
1662   }
1663   free_aligned_buffer_page_end(src_argb_a);
1664   free_aligned_buffer_page_end(dst_argb_c);
1665   free_aligned_buffer_page_end(dst_argb_opt);
1666   return max_diff;
1667 }
1668 
TEST_F(LibYUVPlanarTest,ARGBSobel_Any)1669 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
1670   int max_diff =
1671       TestSobel(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1672                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1673   EXPECT_EQ(0, max_diff);
1674 }
1675 
TEST_F(LibYUVPlanarTest,ARGBSobel_Unaligned)1676 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) {
1677   int max_diff =
1678       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1679                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1680   EXPECT_EQ(0, max_diff);
1681 }
1682 
TEST_F(LibYUVPlanarTest,ARGBSobel_Invert)1683 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) {
1684   int max_diff =
1685       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1686                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1687   EXPECT_EQ(0, max_diff);
1688 }
1689 
TEST_F(LibYUVPlanarTest,ARGBSobel_Opt)1690 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) {
1691   int max_diff =
1692       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
1693                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1694   EXPECT_EQ(0, max_diff);
1695 }
1696 
TestSobelToPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1697 static int TestSobelToPlane(int width,
1698                             int height,
1699                             int benchmark_iterations,
1700                             int disable_cpu_flags,
1701                             int benchmark_cpu_info,
1702                             int invert,
1703                             int off) {
1704   if (width < 1) {
1705     width = 1;
1706   }
1707   const int kSrcBpp = 4;
1708   const int kDstBpp = 1;
1709   const int kSrcStride = (width * kSrcBpp + 15) & ~15;
1710   const int kDstStride = (width * kDstBpp + 15) & ~15;
1711   align_buffer_page_end(src_argb_a, kSrcStride * height + off);
1712   align_buffer_page_end(dst_argb_c, kDstStride * height);
1713   align_buffer_page_end(dst_argb_opt, kDstStride * height);
1714   memset(src_argb_a, 0, kSrcStride * height + off);
1715   for (int i = 0; i < kSrcStride * height; ++i) {
1716     src_argb_a[i + off] = (fastrand() & 0xff);
1717   }
1718   memset(dst_argb_c, 0, kDstStride * height);
1719   memset(dst_argb_opt, 0, kDstStride * height);
1720 
1721   MaskCpuFlags(disable_cpu_flags);
1722   ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width,
1723                    invert * height);
1724   MaskCpuFlags(benchmark_cpu_info);
1725   for (int i = 0; i < benchmark_iterations; ++i) {
1726     ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride,
1727                      width, invert * height);
1728   }
1729   int max_diff = 0;
1730   for (int i = 0; i < kDstStride * height; ++i) {
1731     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1732                        static_cast<int>(dst_argb_opt[i]));
1733     if (abs_diff > max_diff) {
1734       max_diff = abs_diff;
1735     }
1736   }
1737   free_aligned_buffer_page_end(src_argb_a);
1738   free_aligned_buffer_page_end(dst_argb_c);
1739   free_aligned_buffer_page_end(dst_argb_opt);
1740   return max_diff;
1741 }
1742 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Any)1743 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
1744   int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_,
1745                                   benchmark_iterations_, disable_cpu_flags_,
1746                                   benchmark_cpu_info_, +1, 0);
1747   EXPECT_EQ(0, max_diff);
1748 }
1749 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Unaligned)1750 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) {
1751   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1752                                   benchmark_iterations_, disable_cpu_flags_,
1753                                   benchmark_cpu_info_, +1, 1);
1754   EXPECT_EQ(0, max_diff);
1755 }
1756 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Invert)1757 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) {
1758   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1759                                   benchmark_iterations_, disable_cpu_flags_,
1760                                   benchmark_cpu_info_, -1, 0);
1761   EXPECT_EQ(0, max_diff);
1762 }
1763 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Opt)1764 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) {
1765   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
1766                                   benchmark_iterations_, disable_cpu_flags_,
1767                                   benchmark_cpu_info_, +1, 0);
1768   EXPECT_EQ(0, max_diff);
1769 }
1770 
TestSobelXY(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1771 static int TestSobelXY(int width,
1772                        int height,
1773                        int benchmark_iterations,
1774                        int disable_cpu_flags,
1775                        int benchmark_cpu_info,
1776                        int invert,
1777                        int off) {
1778   if (width < 1) {
1779     width = 1;
1780   }
1781   const int kBpp = 4;
1782   const int kStride = width * kBpp;
1783   align_buffer_page_end(src_argb_a, kStride * height + off);
1784   align_buffer_page_end(dst_argb_c, kStride * height);
1785   align_buffer_page_end(dst_argb_opt, kStride * height);
1786   memset(src_argb_a, 0, kStride * height + off);
1787   for (int i = 0; i < kStride * height; ++i) {
1788     src_argb_a[i + off] = (fastrand() & 0xff);
1789   }
1790   memset(dst_argb_c, 0, kStride * height);
1791   memset(dst_argb_opt, 0, kStride * height);
1792 
1793   MaskCpuFlags(disable_cpu_flags);
1794   ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width,
1795               invert * height);
1796   MaskCpuFlags(benchmark_cpu_info);
1797   for (int i = 0; i < benchmark_iterations; ++i) {
1798     ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
1799                 invert * height);
1800   }
1801   int max_diff = 0;
1802   for (int i = 0; i < kStride * height; ++i) {
1803     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1804                        static_cast<int>(dst_argb_opt[i]));
1805     if (abs_diff > max_diff) {
1806       max_diff = abs_diff;
1807     }
1808   }
1809   free_aligned_buffer_page_end(src_argb_a);
1810   free_aligned_buffer_page_end(dst_argb_c);
1811   free_aligned_buffer_page_end(dst_argb_opt);
1812   return max_diff;
1813 }
1814 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Any)1815 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
1816   int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_,
1817                              benchmark_iterations_, disable_cpu_flags_,
1818                              benchmark_cpu_info_, +1, 0);
1819   EXPECT_EQ(0, max_diff);
1820 }
1821 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Unaligned)1822 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) {
1823   int max_diff =
1824       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1825                   disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1826   EXPECT_EQ(0, max_diff);
1827 }
1828 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Invert)1829 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) {
1830   int max_diff =
1831       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1832                   disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1833   EXPECT_EQ(0, max_diff);
1834 }
1835 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Opt)1836 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) {
1837   int max_diff =
1838       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
1839                   disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1840   EXPECT_EQ(0, max_diff);
1841 }
1842 
TestBlur(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int radius)1843 static int TestBlur(int width,
1844                     int height,
1845                     int benchmark_iterations,
1846                     int disable_cpu_flags,
1847                     int benchmark_cpu_info,
1848                     int invert,
1849                     int off,
1850                     int radius) {
1851   if (width < 1) {
1852     width = 1;
1853   }
1854   const int kBpp = 4;
1855   const int kStride = width * kBpp;
1856   align_buffer_page_end(src_argb_a, kStride * height + off);
1857   align_buffer_page_end(dst_cumsum, width * height * 16);
1858   align_buffer_page_end(dst_argb_c, kStride * height);
1859   align_buffer_page_end(dst_argb_opt, kStride * height);
1860   for (int i = 0; i < kStride * height; ++i) {
1861     src_argb_a[i + off] = (fastrand() & 0xff);
1862   }
1863   memset(dst_cumsum, 0, width * height * 16);
1864   memset(dst_argb_c, 0, kStride * height);
1865   memset(dst_argb_opt, 0, kStride * height);
1866 
1867   MaskCpuFlags(disable_cpu_flags);
1868   ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
1869            reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
1870            invert * height, radius);
1871   MaskCpuFlags(benchmark_cpu_info);
1872   for (int i = 0; i < benchmark_iterations; ++i) {
1873     ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
1874              reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
1875              invert * height, radius);
1876   }
1877   int max_diff = 0;
1878   for (int i = 0; i < kStride * height; ++i) {
1879     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1880                        static_cast<int>(dst_argb_opt[i]));
1881     if (abs_diff > max_diff) {
1882       max_diff = abs_diff;
1883     }
1884   }
1885   free_aligned_buffer_page_end(src_argb_a);
1886   free_aligned_buffer_page_end(dst_cumsum);
1887   free_aligned_buffer_page_end(dst_argb_c);
1888   free_aligned_buffer_page_end(dst_argb_opt);
1889   return max_diff;
1890 }
1891 
1892 static const int kBlurSize = 55;
TEST_F(LibYUVPlanarTest,ARGBBlur_Any)1893 TEST_F(LibYUVPlanarTest, ARGBBlur_Any) {
1894   int max_diff =
1895       TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1896                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
1897   EXPECT_LE(max_diff, 1);
1898 }
1899 
TEST_F(LibYUVPlanarTest,ARGBBlur_Unaligned)1900 TEST_F(LibYUVPlanarTest, ARGBBlur_Unaligned) {
1901   int max_diff =
1902       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1903                disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
1904   EXPECT_LE(max_diff, 1);
1905 }
1906 
TEST_F(LibYUVPlanarTest,ARGBBlur_Invert)1907 TEST_F(LibYUVPlanarTest, ARGBBlur_Invert) {
1908   int max_diff =
1909       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1910                disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
1911   EXPECT_LE(max_diff, 1);
1912 }
1913 
TEST_F(LibYUVPlanarTest,ARGBBlur_Opt)1914 TEST_F(LibYUVPlanarTest, ARGBBlur_Opt) {
1915   int max_diff =
1916       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1917                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
1918   EXPECT_LE(max_diff, 1);
1919 }
1920 
1921 static const int kBlurSmallSize = 5;
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Any)1922 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Any) {
1923   int max_diff =
1924       TestBlur(benchmark_width_ - 1, benchmark_height_, benchmark_iterations_,
1925                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
1926   EXPECT_LE(max_diff, 1);
1927 }
1928 
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Unaligned)1929 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Unaligned) {
1930   int max_diff =
1931       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1932                disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
1933   EXPECT_LE(max_diff, 1);
1934 }
1935 
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Invert)1936 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Invert) {
1937   int max_diff =
1938       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1939                disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
1940   EXPECT_LE(max_diff, 1);
1941 }
1942 
TEST_F(LibYUVPlanarTest,ARGBBlurSmall_Opt)1943 TEST_F(LibYUVPlanarTest, ARGBBlurSmall_Opt) {
1944   int max_diff =
1945       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
1946                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
1947   EXPECT_LE(max_diff, 1);
1948 }
1949 
TEST_F(LibYUVPlanarTest,TestARGBPolynomial)1950 TEST_F(LibYUVPlanarTest, TestARGBPolynomial) {
1951   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
1952   SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
1953   SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
1954   memset(orig_pixels, 0, sizeof(orig_pixels));
1955 
1956   SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
1957       0.94230f,  -3.03300f,    -2.92500f,    0.f,  // C0
1958       0.584500f, 1.112000f,    1.535000f,    1.f,  // C1 x
1959       0.001313f, -0.002503f,   -0.004496f,   0.f,  // C2 x * x
1960       0.0f,      0.000006965f, 0.000008781f, 0.f,  // C3 x * x * x
1961   };
1962 
1963   // Test blue
1964   orig_pixels[0][0] = 255u;
1965   orig_pixels[0][1] = 0u;
1966   orig_pixels[0][2] = 0u;
1967   orig_pixels[0][3] = 128u;
1968   // Test green
1969   orig_pixels[1][0] = 0u;
1970   orig_pixels[1][1] = 255u;
1971   orig_pixels[1][2] = 0u;
1972   orig_pixels[1][3] = 0u;
1973   // Test red
1974   orig_pixels[2][0] = 0u;
1975   orig_pixels[2][1] = 0u;
1976   orig_pixels[2][2] = 255u;
1977   orig_pixels[2][3] = 255u;
1978   // Test white
1979   orig_pixels[3][0] = 255u;
1980   orig_pixels[3][1] = 255u;
1981   orig_pixels[3][2] = 255u;
1982   orig_pixels[3][3] = 255u;
1983   // Test color
1984   orig_pixels[4][0] = 16u;
1985   orig_pixels[4][1] = 64u;
1986   orig_pixels[4][2] = 192u;
1987   orig_pixels[4][3] = 224u;
1988   // Do 16 to test asm version.
1989   ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
1990                  &kWarmifyPolynomial[0], 16, 1);
1991   EXPECT_EQ(235u, dst_pixels_opt[0][0]);
1992   EXPECT_EQ(0u, dst_pixels_opt[0][1]);
1993   EXPECT_EQ(0u, dst_pixels_opt[0][2]);
1994   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
1995   EXPECT_EQ(0u, dst_pixels_opt[1][0]);
1996   EXPECT_EQ(233u, dst_pixels_opt[1][1]);
1997   EXPECT_EQ(0u, dst_pixels_opt[1][2]);
1998   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
1999   EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2000   EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2001   EXPECT_EQ(241u, dst_pixels_opt[2][2]);
2002   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2003   EXPECT_EQ(235u, dst_pixels_opt[3][0]);
2004   EXPECT_EQ(233u, dst_pixels_opt[3][1]);
2005   EXPECT_EQ(241u, dst_pixels_opt[3][2]);
2006   EXPECT_EQ(255u, dst_pixels_opt[3][3]);
2007   EXPECT_EQ(10u, dst_pixels_opt[4][0]);
2008   EXPECT_EQ(59u, dst_pixels_opt[4][1]);
2009   EXPECT_EQ(188u, dst_pixels_opt[4][2]);
2010   EXPECT_EQ(224u, dst_pixels_opt[4][3]);
2011 
2012   for (int i = 0; i < 1280; ++i) {
2013     orig_pixels[i][0] = i;
2014     orig_pixels[i][1] = i / 2;
2015     orig_pixels[i][2] = i / 3;
2016     orig_pixels[i][3] = i;
2017   }
2018 
2019   MaskCpuFlags(disable_cpu_flags_);
2020   ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2021                  &kWarmifyPolynomial[0], 1280, 1);
2022   MaskCpuFlags(benchmark_cpu_info_);
2023 
2024   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2025     ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2026                    &kWarmifyPolynomial[0], 1280, 1);
2027   }
2028 
2029   for (int i = 0; i < 1280; ++i) {
2030     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2031     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2032     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2033     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2034   }
2035 }
2036 
TestHalfFloatPlane(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale,int mask)2037 int TestHalfFloatPlane(int benchmark_width,
2038                        int benchmark_height,
2039                        int benchmark_iterations,
2040                        int disable_cpu_flags,
2041                        int benchmark_cpu_info,
2042                        float scale,
2043                        int mask) {
2044   int i, j;
2045   const int y_plane_size = benchmark_width * benchmark_height * 2;
2046 
2047   align_buffer_page_end(orig_y, y_plane_size * 3);
2048   uint8_t* dst_opt = orig_y + y_plane_size;
2049   uint8_t* dst_c = orig_y + y_plane_size * 2;
2050 
2051   MemRandomize(orig_y, y_plane_size);
2052   memset(dst_c, 0, y_plane_size);
2053   memset(dst_opt, 1, y_plane_size);
2054 
2055   for (i = 0; i < y_plane_size / 2; ++i) {
2056     reinterpret_cast<uint16_t*>(orig_y)[i] &= mask;
2057   }
2058 
2059   // Disable all optimizations.
2060   MaskCpuFlags(disable_cpu_flags);
2061   for (j = 0; j < benchmark_iterations; j++) {
2062     HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2063                    reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
2064                    scale, benchmark_width, benchmark_height);
2065   }
2066 
2067   // Enable optimizations.
2068   MaskCpuFlags(benchmark_cpu_info);
2069   for (j = 0; j < benchmark_iterations; j++) {
2070     HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2071                    reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
2072                    scale, benchmark_width, benchmark_height);
2073   }
2074 
2075   int max_diff = 0;
2076   for (i = 0; i < y_plane_size / 2; ++i) {
2077     int abs_diff =
2078         abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
2079             static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
2080     if (abs_diff > max_diff) {
2081       max_diff = abs_diff;
2082     }
2083   }
2084 
2085   free_aligned_buffer_page_end(orig_y);
2086   return max_diff;
2087 }
2088 
2089 #if defined(__arm__)
EnableFlushDenormalToZero(void)2090 static void EnableFlushDenormalToZero(void) {
2091   uint32_t cw;
2092   __asm__ __volatile__(
2093       "vmrs   %0, fpscr         \n"
2094       "orr    %0, %0, #0x1000000        \n"
2095       "vmsr   fpscr, %0         \n"
2096       : "=r"(cw)::"memory");
2097 }
2098 #endif
2099 
2100 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
2101 // exponent to be less than 0.  15 - log2(65536) = -1/  This shouldnt normally
2102 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
2103 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_denormal)2104 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
2105 // 32 bit arm rounding on denormal case is off by 1 compared to C.
2106 #if defined(__arm__)
2107   EnableFlushDenormalToZero();
2108 #endif
2109   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2110                                 benchmark_iterations_, disable_cpu_flags_,
2111                                 benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
2112   EXPECT_EQ(0, diff);
2113 }
2114 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_One)2115 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
2116   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2117                                 benchmark_iterations_, disable_cpu_flags_,
2118                                 benchmark_cpu_info_, 1.0f, 65535);
2119   EXPECT_LE(diff, 1);
2120 }
2121 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_Opt)2122 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
2123   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2124                                 benchmark_iterations_, disable_cpu_flags_,
2125                                 benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
2126   EXPECT_EQ(0, diff);
2127 }
2128 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_10bit_Opt)2129 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
2130   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2131                                 benchmark_iterations_, disable_cpu_flags_,
2132                                 benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
2133   EXPECT_EQ(0, diff);
2134 }
2135 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_9bit_Opt)2136 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
2137   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2138                                 benchmark_iterations_, disable_cpu_flags_,
2139                                 benchmark_cpu_info_, 1.0f / 512.0f, 511);
2140   EXPECT_EQ(0, diff);
2141 }
2142 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Opt)2143 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
2144   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2145                                 benchmark_iterations_, disable_cpu_flags_,
2146                                 benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
2147   EXPECT_EQ(0, diff);
2148 }
2149 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Offby1)2150 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
2151   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2152                                 benchmark_iterations_, disable_cpu_flags_,
2153                                 benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
2154   EXPECT_EQ(0, diff);
2155 }
2156 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_One)2157 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
2158   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2159                                 benchmark_iterations_, disable_cpu_flags_,
2160                                 benchmark_cpu_info_, 1.0f, 2047);
2161   EXPECT_EQ(0, diff);
2162 }
2163 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_12bit_One)2164 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
2165   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2166                                 benchmark_iterations_, disable_cpu_flags_,
2167                                 benchmark_cpu_info_, 1.0f, 4095);
2168   EXPECT_LE(diff, 1);
2169 }
2170 
TestByteToFloat(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale)2171 float TestByteToFloat(int benchmark_width,
2172                       int benchmark_height,
2173                       int benchmark_iterations,
2174                       int disable_cpu_flags,
2175                       int benchmark_cpu_info,
2176                       float scale) {
2177   int i, j;
2178   const int y_plane_size = benchmark_width * benchmark_height;
2179 
2180   align_buffer_page_end(orig_y, y_plane_size * (1 + 4 + 4));
2181   float* dst_opt = reinterpret_cast<float*>(orig_y + y_plane_size);
2182   float* dst_c = reinterpret_cast<float*>(orig_y + y_plane_size * 5);
2183 
2184   MemRandomize(orig_y, y_plane_size);
2185   memset(dst_c, 0, y_plane_size * 4);
2186   memset(dst_opt, 1, y_plane_size * 4);
2187 
2188   // Disable all optimizations.
2189   MaskCpuFlags(disable_cpu_flags);
2190   ByteToFloat(orig_y, dst_c, scale, y_plane_size);
2191 
2192   // Enable optimizations.
2193   MaskCpuFlags(benchmark_cpu_info);
2194   for (j = 0; j < benchmark_iterations; j++) {
2195     ByteToFloat(orig_y, dst_opt, scale, y_plane_size);
2196   }
2197 
2198   float max_diff = 0;
2199   for (i = 0; i < y_plane_size; ++i) {
2200     float abs_diff = fabs(dst_c[i] - dst_opt[i]);
2201     if (abs_diff > max_diff) {
2202       max_diff = abs_diff;
2203     }
2204   }
2205 
2206   free_aligned_buffer_page_end(orig_y);
2207   return max_diff;
2208 }
2209 
TEST_F(LibYUVPlanarTest,TestByteToFloat)2210 TEST_F(LibYUVPlanarTest, TestByteToFloat) {
2211   float diff = TestByteToFloat(benchmark_width_, benchmark_height_,
2212                                benchmark_iterations_, disable_cpu_flags_,
2213                                benchmark_cpu_info_, 1.0f);
2214   EXPECT_EQ(0.f, diff);
2215 }
2216 
TEST_F(LibYUVPlanarTest,TestARGBLumaColorTable)2217 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
2218   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2219   SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2220   SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2221   memset(orig_pixels, 0, sizeof(orig_pixels));
2222 
2223   align_buffer_page_end(lumacolortable, 32768);
2224   int v = 0;
2225   for (int i = 0; i < 32768; ++i) {
2226     lumacolortable[i] = v;
2227     v += 3;
2228   }
2229   // Test blue
2230   orig_pixels[0][0] = 255u;
2231   orig_pixels[0][1] = 0u;
2232   orig_pixels[0][2] = 0u;
2233   orig_pixels[0][3] = 128u;
2234   // Test green
2235   orig_pixels[1][0] = 0u;
2236   orig_pixels[1][1] = 255u;
2237   orig_pixels[1][2] = 0u;
2238   orig_pixels[1][3] = 0u;
2239   // Test red
2240   orig_pixels[2][0] = 0u;
2241   orig_pixels[2][1] = 0u;
2242   orig_pixels[2][2] = 255u;
2243   orig_pixels[2][3] = 255u;
2244   // Test color
2245   orig_pixels[3][0] = 16u;
2246   orig_pixels[3][1] = 64u;
2247   orig_pixels[3][2] = 192u;
2248   orig_pixels[3][3] = 224u;
2249   // Do 16 to test asm version.
2250   ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2251                      &lumacolortable[0], 16, 1);
2252   EXPECT_EQ(253u, dst_pixels_opt[0][0]);
2253   EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2254   EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2255   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2256   EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2257   EXPECT_EQ(253u, dst_pixels_opt[1][1]);
2258   EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2259   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2260   EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2261   EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2262   EXPECT_EQ(253u, dst_pixels_opt[2][2]);
2263   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2264   EXPECT_EQ(48u, dst_pixels_opt[3][0]);
2265   EXPECT_EQ(192u, dst_pixels_opt[3][1]);
2266   EXPECT_EQ(64u, dst_pixels_opt[3][2]);
2267   EXPECT_EQ(224u, dst_pixels_opt[3][3]);
2268 
2269   for (int i = 0; i < 1280; ++i) {
2270     orig_pixels[i][0] = i;
2271     orig_pixels[i][1] = i / 2;
2272     orig_pixels[i][2] = i / 3;
2273     orig_pixels[i][3] = i;
2274   }
2275 
2276   MaskCpuFlags(disable_cpu_flags_);
2277   ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2278                      lumacolortable, 1280, 1);
2279   MaskCpuFlags(benchmark_cpu_info_);
2280 
2281   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2282     ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2283                        lumacolortable, 1280, 1);
2284   }
2285   for (int i = 0; i < 1280; ++i) {
2286     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2287     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2288     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2289     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2290   }
2291 
2292   free_aligned_buffer_page_end(lumacolortable);
2293 }
2294 
TEST_F(LibYUVPlanarTest,TestARGBCopyAlpha)2295 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
2296   const int kSize = benchmark_width_ * benchmark_height_ * 4;
2297   align_buffer_page_end(orig_pixels, kSize);
2298   align_buffer_page_end(dst_pixels_opt, kSize);
2299   align_buffer_page_end(dst_pixels_c, kSize);
2300 
2301   MemRandomize(orig_pixels, kSize);
2302   MemRandomize(dst_pixels_opt, kSize);
2303   memcpy(dst_pixels_c, dst_pixels_opt, kSize);
2304 
2305   MaskCpuFlags(disable_cpu_flags_);
2306   ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c,
2307                 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2308   MaskCpuFlags(benchmark_cpu_info_);
2309 
2310   for (int i = 0; i < benchmark_iterations_; ++i) {
2311     ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt,
2312                   benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2313   }
2314   for (int i = 0; i < kSize; ++i) {
2315     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2316   }
2317 
2318   free_aligned_buffer_page_end(dst_pixels_c);
2319   free_aligned_buffer_page_end(dst_pixels_opt);
2320   free_aligned_buffer_page_end(orig_pixels);
2321 }
2322 
TEST_F(LibYUVPlanarTest,TestARGBExtractAlpha)2323 TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
2324   const int kPixels = benchmark_width_ * benchmark_height_;
2325   align_buffer_page_end(src_pixels, kPixels * 4);
2326   align_buffer_page_end(dst_pixels_opt, kPixels);
2327   align_buffer_page_end(dst_pixels_c, kPixels);
2328 
2329   MemRandomize(src_pixels, kPixels * 4);
2330   MemRandomize(dst_pixels_opt, kPixels);
2331   memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
2332 
2333   MaskCpuFlags(disable_cpu_flags_);
2334   ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2335                    benchmark_width_, benchmark_width_, benchmark_height_);
2336   MaskCpuFlags(benchmark_cpu_info_);
2337 
2338   for (int i = 0; i < benchmark_iterations_; ++i) {
2339     ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2340                      benchmark_width_, benchmark_width_, benchmark_height_);
2341   }
2342   for (int i = 0; i < kPixels; ++i) {
2343     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2344   }
2345 
2346   free_aligned_buffer_page_end(dst_pixels_c);
2347   free_aligned_buffer_page_end(dst_pixels_opt);
2348   free_aligned_buffer_page_end(src_pixels);
2349 }
2350 
TEST_F(LibYUVPlanarTest,TestARGBCopyYToAlpha)2351 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
2352   const int kPixels = benchmark_width_ * benchmark_height_;
2353   align_buffer_page_end(orig_pixels, kPixels);
2354   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
2355   align_buffer_page_end(dst_pixels_c, kPixels * 4);
2356 
2357   MemRandomize(orig_pixels, kPixels);
2358   MemRandomize(dst_pixels_opt, kPixels * 4);
2359   memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4);
2360 
2361   MaskCpuFlags(disable_cpu_flags_);
2362   ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2363                    benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2364   MaskCpuFlags(benchmark_cpu_info_);
2365 
2366   for (int i = 0; i < benchmark_iterations_; ++i) {
2367     ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2368                      benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2369   }
2370   for (int i = 0; i < kPixels * 4; ++i) {
2371     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2372   }
2373 
2374   free_aligned_buffer_page_end(dst_pixels_c);
2375   free_aligned_buffer_page_end(dst_pixels_opt);
2376   free_aligned_buffer_page_end(orig_pixels);
2377 }
2378 
TestARGBRect(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int bpp)2379 static int TestARGBRect(int width,
2380                         int height,
2381                         int benchmark_iterations,
2382                         int disable_cpu_flags,
2383                         int benchmark_cpu_info,
2384                         int invert,
2385                         int off,
2386                         int bpp) {
2387   if (width < 1) {
2388     width = 1;
2389   }
2390   const int kStride = width * bpp;
2391   const int kSize = kStride * height;
2392   const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
2393 
2394   align_buffer_page_end(dst_argb_c, kSize + off);
2395   align_buffer_page_end(dst_argb_opt, kSize + off);
2396 
2397   MemRandomize(dst_argb_c + off, kSize);
2398   memcpy(dst_argb_opt + off, dst_argb_c + off, kSize);
2399 
2400   MaskCpuFlags(disable_cpu_flags);
2401   if (bpp == 4) {
2402     ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32);
2403   } else {
2404     SetPlane(dst_argb_c + off, kStride, width, invert * height, v32);
2405   }
2406 
2407   MaskCpuFlags(benchmark_cpu_info);
2408   for (int i = 0; i < benchmark_iterations; ++i) {
2409     if (bpp == 4) {
2410       ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32);
2411     } else {
2412       SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32);
2413     }
2414   }
2415   int max_diff = 0;
2416   for (int i = 0; i < kStride * height; ++i) {
2417     int abs_diff = abs(static_cast<int>(dst_argb_c[i + off]) -
2418                        static_cast<int>(dst_argb_opt[i + off]));
2419     if (abs_diff > max_diff) {
2420       max_diff = abs_diff;
2421     }
2422   }
2423   free_aligned_buffer_page_end(dst_argb_c);
2424   free_aligned_buffer_page_end(dst_argb_opt);
2425   return max_diff;
2426 }
2427 
TEST_F(LibYUVPlanarTest,ARGBRect_Any)2428 TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
2429   int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
2430                               benchmark_iterations_, disable_cpu_flags_,
2431                               benchmark_cpu_info_, +1, 0, 4);
2432   EXPECT_EQ(0, max_diff);
2433 }
2434 
TEST_F(LibYUVPlanarTest,ARGBRect_Unaligned)2435 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) {
2436   int max_diff =
2437       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2438                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4);
2439   EXPECT_EQ(0, max_diff);
2440 }
2441 
TEST_F(LibYUVPlanarTest,ARGBRect_Invert)2442 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) {
2443   int max_diff =
2444       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2445                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4);
2446   EXPECT_EQ(0, max_diff);
2447 }
2448 
TEST_F(LibYUVPlanarTest,ARGBRect_Opt)2449 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
2450   int max_diff =
2451       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2452                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4);
2453   EXPECT_EQ(0, max_diff);
2454 }
2455 
TEST_F(LibYUVPlanarTest,SetPlane_Any)2456 TEST_F(LibYUVPlanarTest, SetPlane_Any) {
2457   int max_diff = TestARGBRect(benchmark_width_ - 1, benchmark_height_,
2458                               benchmark_iterations_, disable_cpu_flags_,
2459                               benchmark_cpu_info_, +1, 0, 1);
2460   EXPECT_EQ(0, max_diff);
2461 }
2462 
TEST_F(LibYUVPlanarTest,SetPlane_Unaligned)2463 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) {
2464   int max_diff =
2465       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2466                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
2467   EXPECT_EQ(0, max_diff);
2468 }
2469 
TEST_F(LibYUVPlanarTest,SetPlane_Invert)2470 TEST_F(LibYUVPlanarTest, SetPlane_Invert) {
2471   int max_diff =
2472       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2473                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
2474   EXPECT_EQ(0, max_diff);
2475 }
2476 
TEST_F(LibYUVPlanarTest,SetPlane_Opt)2477 TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
2478   int max_diff =
2479       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2480                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
2481   EXPECT_EQ(0, max_diff);
2482 }
2483 
TEST_F(LibYUVPlanarTest,MergeUVPlane_Opt)2484 TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
2485   const int kPixels = benchmark_width_ * benchmark_height_;
2486   align_buffer_page_end(src_pixels, kPixels * 2);
2487   align_buffer_page_end(tmp_pixels_u, kPixels);
2488   align_buffer_page_end(tmp_pixels_v, kPixels);
2489   align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2490   align_buffer_page_end(dst_pixels_c, kPixels * 2);
2491 
2492   MemRandomize(src_pixels, kPixels * 2);
2493   MemRandomize(tmp_pixels_u, kPixels);
2494   MemRandomize(tmp_pixels_v, kPixels);
2495   MemRandomize(dst_pixels_opt, kPixels * 2);
2496   MemRandomize(dst_pixels_c, kPixels * 2);
2497 
2498   MaskCpuFlags(disable_cpu_flags_);
2499   SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2500                tmp_pixels_v, benchmark_width_, benchmark_width_,
2501                benchmark_height_);
2502   MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2503                dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2504                benchmark_height_);
2505   MaskCpuFlags(benchmark_cpu_info_);
2506 
2507   SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2508                tmp_pixels_v, benchmark_width_, benchmark_width_,
2509                benchmark_height_);
2510 
2511   for (int i = 0; i < benchmark_iterations_; ++i) {
2512     MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2513                  dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2514                  benchmark_height_);
2515   }
2516 
2517   for (int i = 0; i < kPixels * 2; ++i) {
2518     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2519   }
2520 
2521   free_aligned_buffer_page_end(src_pixels);
2522   free_aligned_buffer_page_end(tmp_pixels_u);
2523   free_aligned_buffer_page_end(tmp_pixels_v);
2524   free_aligned_buffer_page_end(dst_pixels_opt);
2525   free_aligned_buffer_page_end(dst_pixels_c);
2526 }
2527 
TEST_F(LibYUVPlanarTest,SplitUVPlane_Opt)2528 TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
2529   const int kPixels = benchmark_width_ * benchmark_height_;
2530   align_buffer_page_end(src_pixels, kPixels * 2);
2531   align_buffer_page_end(tmp_pixels_u, kPixels);
2532   align_buffer_page_end(tmp_pixels_v, kPixels);
2533   align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2534   align_buffer_page_end(dst_pixels_c, kPixels * 2);
2535 
2536   MemRandomize(src_pixels, kPixels * 2);
2537   MemRandomize(tmp_pixels_u, kPixels);
2538   MemRandomize(tmp_pixels_v, kPixels);
2539   MemRandomize(dst_pixels_opt, kPixels * 2);
2540   MemRandomize(dst_pixels_c, kPixels * 2);
2541 
2542   MaskCpuFlags(disable_cpu_flags_);
2543   SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u, benchmark_width_,
2544                tmp_pixels_v, benchmark_width_, benchmark_width_,
2545                benchmark_height_);
2546   MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2547                dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2548                benchmark_height_);
2549   MaskCpuFlags(benchmark_cpu_info_);
2550 
2551   for (int i = 0; i < benchmark_iterations_; ++i) {
2552     SplitUVPlane(src_pixels, benchmark_width_ * 2, tmp_pixels_u,
2553                  benchmark_width_, tmp_pixels_v, benchmark_width_,
2554                  benchmark_width_, benchmark_height_);
2555   }
2556   MergeUVPlane(tmp_pixels_u, benchmark_width_, tmp_pixels_v, benchmark_width_,
2557                dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2558                benchmark_height_);
2559 
2560   for (int i = 0; i < kPixels * 2; ++i) {
2561     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2562   }
2563 
2564   free_aligned_buffer_page_end(src_pixels);
2565   free_aligned_buffer_page_end(tmp_pixels_u);
2566   free_aligned_buffer_page_end(tmp_pixels_v);
2567   free_aligned_buffer_page_end(dst_pixels_opt);
2568   free_aligned_buffer_page_end(dst_pixels_c);
2569 }
2570 
TEST_F(LibYUVPlanarTest,MergeRGBPlane_Opt)2571 TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
2572   const int kPixels = benchmark_width_ * benchmark_height_;
2573   align_buffer_page_end(src_pixels, kPixels * 3);
2574   align_buffer_page_end(tmp_pixels_r, kPixels);
2575   align_buffer_page_end(tmp_pixels_g, kPixels);
2576   align_buffer_page_end(tmp_pixels_b, kPixels);
2577   align_buffer_page_end(dst_pixels_opt, kPixels * 3);
2578   align_buffer_page_end(dst_pixels_c, kPixels * 3);
2579 
2580   MemRandomize(src_pixels, kPixels * 3);
2581   MemRandomize(tmp_pixels_r, kPixels);
2582   MemRandomize(tmp_pixels_g, kPixels);
2583   MemRandomize(tmp_pixels_b, kPixels);
2584   MemRandomize(dst_pixels_opt, kPixels * 3);
2585   MemRandomize(dst_pixels_c, kPixels * 3);
2586 
2587   MaskCpuFlags(disable_cpu_flags_);
2588   SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
2589                 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
2590                 benchmark_width_, benchmark_width_, benchmark_height_);
2591   MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
2592                 tmp_pixels_b, benchmark_width_, dst_pixels_c,
2593                 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
2594   MaskCpuFlags(benchmark_cpu_info_);
2595 
2596   SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
2597                 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
2598                 benchmark_width_, benchmark_width_, benchmark_height_);
2599 
2600   for (int i = 0; i < benchmark_iterations_; ++i) {
2601     MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
2602                   benchmark_width_, tmp_pixels_b, benchmark_width_,
2603                   dst_pixels_opt, benchmark_width_ * 3, benchmark_width_,
2604                   benchmark_height_);
2605   }
2606 
2607   for (int i = 0; i < kPixels * 3; ++i) {
2608     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2609   }
2610 
2611   free_aligned_buffer_page_end(src_pixels);
2612   free_aligned_buffer_page_end(tmp_pixels_r);
2613   free_aligned_buffer_page_end(tmp_pixels_g);
2614   free_aligned_buffer_page_end(tmp_pixels_b);
2615   free_aligned_buffer_page_end(dst_pixels_opt);
2616   free_aligned_buffer_page_end(dst_pixels_c);
2617 }
2618 
TEST_F(LibYUVPlanarTest,SplitRGBPlane_Opt)2619 TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
2620   const int kPixels = benchmark_width_ * benchmark_height_;
2621   align_buffer_page_end(src_pixels, kPixels * 3);
2622   align_buffer_page_end(tmp_pixels_r, kPixels);
2623   align_buffer_page_end(tmp_pixels_g, kPixels);
2624   align_buffer_page_end(tmp_pixels_b, kPixels);
2625   align_buffer_page_end(dst_pixels_opt, kPixels * 3);
2626   align_buffer_page_end(dst_pixels_c, kPixels * 3);
2627 
2628   MemRandomize(src_pixels, kPixels * 3);
2629   MemRandomize(tmp_pixels_r, kPixels);
2630   MemRandomize(tmp_pixels_g, kPixels);
2631   MemRandomize(tmp_pixels_b, kPixels);
2632   MemRandomize(dst_pixels_opt, kPixels * 3);
2633   MemRandomize(dst_pixels_c, kPixels * 3);
2634 
2635   MaskCpuFlags(disable_cpu_flags_);
2636   SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
2637                 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
2638                 benchmark_width_, benchmark_width_, benchmark_height_);
2639   MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
2640                 tmp_pixels_b, benchmark_width_, dst_pixels_c,
2641                 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
2642   MaskCpuFlags(benchmark_cpu_info_);
2643 
2644   for (int i = 0; i < benchmark_iterations_; ++i) {
2645     SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
2646                   benchmark_width_, tmp_pixels_g, benchmark_width_,
2647                   tmp_pixels_b, benchmark_width_, benchmark_width_,
2648                   benchmark_height_);
2649   }
2650   MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
2651                 tmp_pixels_b, benchmark_width_, dst_pixels_opt,
2652                 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
2653 
2654   for (int i = 0; i < kPixels * 3; ++i) {
2655     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2656   }
2657 
2658   free_aligned_buffer_page_end(src_pixels);
2659   free_aligned_buffer_page_end(tmp_pixels_r);
2660   free_aligned_buffer_page_end(tmp_pixels_g);
2661   free_aligned_buffer_page_end(tmp_pixels_b);
2662   free_aligned_buffer_page_end(dst_pixels_opt);
2663   free_aligned_buffer_page_end(dst_pixels_c);
2664 }
2665 
2666 // TODO(fbarchard): improve test for platforms and cpu detect
2667 #ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest,MergeUVRow_16_Opt)2668 TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
2669   const int kPixels = benchmark_width_ * benchmark_height_;
2670   align_buffer_page_end(src_pixels_u, kPixels * 2);
2671   align_buffer_page_end(src_pixels_v, kPixels * 2);
2672   align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
2673   align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2);
2674 
2675   MemRandomize(src_pixels_u, kPixels * 2);
2676   MemRandomize(src_pixels_v, kPixels * 2);
2677   memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
2678   memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
2679 
2680   MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
2681                   reinterpret_cast<const uint16_t*>(src_pixels_v),
2682                   reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 64, kPixels);
2683 
2684   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
2685   for (int i = 0; i < benchmark_iterations_; ++i) {
2686     if (has_avx2) {
2687       MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
2688                          reinterpret_cast<const uint16_t*>(src_pixels_v),
2689                          reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
2690                          kPixels);
2691     } else {
2692       MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
2693                       reinterpret_cast<const uint16_t*>(src_pixels_v),
2694                       reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 64,
2695                       kPixels);
2696     }
2697   }
2698 
2699   for (int i = 0; i < kPixels * 2 * 2; ++i) {
2700     EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]);
2701   }
2702 
2703   free_aligned_buffer_page_end(src_pixels_u);
2704   free_aligned_buffer_page_end(src_pixels_v);
2705   free_aligned_buffer_page_end(dst_pixels_uv_opt);
2706   free_aligned_buffer_page_end(dst_pixels_uv_c);
2707 }
2708 #endif
2709 
2710 // TODO(fbarchard): Improve test for more platforms.
2711 #ifdef HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest,MultiplyRow_16_Opt)2712 TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
2713   const int kPixels = benchmark_width_ * benchmark_height_;
2714   align_buffer_page_end(src_pixels_y, kPixels * 2);
2715   align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
2716   align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
2717 
2718   MemRandomize(src_pixels_y, kPixels * 2);
2719   memset(dst_pixels_y_opt, 0, kPixels * 2);
2720   memset(dst_pixels_y_c, 1, kPixels * 2);
2721 
2722   MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
2723                    reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels);
2724 
2725   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
2726   for (int i = 0; i < benchmark_iterations_; ++i) {
2727     if (has_avx2) {
2728       MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
2729                           reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
2730                           kPixels);
2731     } else {
2732       MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
2733                        reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
2734                        kPixels);
2735     }
2736   }
2737 
2738   for (int i = 0; i < kPixels * 2; ++i) {
2739     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
2740   }
2741 
2742   free_aligned_buffer_page_end(src_pixels_y);
2743   free_aligned_buffer_page_end(dst_pixels_y_opt);
2744   free_aligned_buffer_page_end(dst_pixels_y_c);
2745 }
2746 #endif  // HAS_MULTIPLYROW_16_AVX2
2747 
TEST_F(LibYUVPlanarTest,Convert16To8Plane)2748 TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
2749   const int kPixels = benchmark_width_ * benchmark_height_;
2750   align_buffer_page_end(src_pixels_y, kPixels * 2);
2751   align_buffer_page_end(dst_pixels_y_opt, kPixels);
2752   align_buffer_page_end(dst_pixels_y_c, kPixels);
2753 
2754   MemRandomize(src_pixels_y, kPixels * 2);
2755   memset(dst_pixels_y_opt, 0, kPixels);
2756   memset(dst_pixels_y_c, 1, kPixels);
2757 
2758   MaskCpuFlags(disable_cpu_flags_);
2759   Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
2760                     benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
2761                     benchmark_width_, benchmark_height_);
2762   MaskCpuFlags(benchmark_cpu_info_);
2763 
2764   for (int i = 0; i < benchmark_iterations_; ++i) {
2765     Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
2766                       benchmark_width_, dst_pixels_y_opt, benchmark_width_,
2767                       16384, benchmark_width_, benchmark_height_);
2768   }
2769 
2770   for (int i = 0; i < kPixels; ++i) {
2771     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
2772   }
2773 
2774   free_aligned_buffer_page_end(src_pixels_y);
2775   free_aligned_buffer_page_end(dst_pixels_y_opt);
2776   free_aligned_buffer_page_end(dst_pixels_y_c);
2777 }
2778 
2779 // TODO(fbarchard): Improve test for more platforms.
2780 #ifdef HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert16To8Row_Opt)2781 TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
2782   // AVX2 does multiple of 32, so round count up
2783   const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
2784   align_buffer_page_end(src_pixels_y, kPixels * 2);
2785   align_buffer_page_end(dst_pixels_y_opt, kPixels);
2786   align_buffer_page_end(dst_pixels_y_c, kPixels);
2787 
2788   MemRandomize(src_pixels_y, kPixels * 2);
2789   // clamp source range to 10 bits.
2790   for (int i = 0; i < kPixels; ++i) {
2791     reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023;
2792   }
2793 
2794   memset(dst_pixels_y_opt, 0, kPixels);
2795   memset(dst_pixels_y_c, 1, kPixels);
2796 
2797   Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
2798                     dst_pixels_y_c, 16384, kPixels);
2799 
2800   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
2801   int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
2802   for (int i = 0; i < benchmark_iterations_; ++i) {
2803     if (has_avx2) {
2804       Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
2805                            dst_pixels_y_opt, 16384, kPixels);
2806     } else if (has_ssse3) {
2807       Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y),
2808                             dst_pixels_y_opt, 16384, kPixels);
2809     } else {
2810       Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
2811                         dst_pixels_y_opt, 16384, kPixels);
2812     }
2813   }
2814 
2815   for (int i = 0; i < kPixels; ++i) {
2816     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
2817   }
2818 
2819   free_aligned_buffer_page_end(src_pixels_y);
2820   free_aligned_buffer_page_end(dst_pixels_y_opt);
2821   free_aligned_buffer_page_end(dst_pixels_y_c);
2822 }
2823 #endif  // HAS_CONVERT16TO8ROW_AVX2
2824 
TEST_F(LibYUVPlanarTest,Convert8To16Plane)2825 TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
2826   const int kPixels = benchmark_width_ * benchmark_height_;
2827   align_buffer_page_end(src_pixels_y, kPixels);
2828   align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
2829   align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
2830 
2831   MemRandomize(src_pixels_y, kPixels);
2832   memset(dst_pixels_y_opt, 0, kPixels * 2);
2833   memset(dst_pixels_y_c, 1, kPixels * 2);
2834 
2835   MaskCpuFlags(disable_cpu_flags_);
2836   Convert8To16Plane(src_pixels_y, benchmark_width_,
2837                     reinterpret_cast<uint16_t*>(dst_pixels_y_c),
2838                     benchmark_width_, 1024, benchmark_width_,
2839                     benchmark_height_);
2840   MaskCpuFlags(benchmark_cpu_info_);
2841 
2842   for (int i = 0; i < benchmark_iterations_; ++i) {
2843     Convert8To16Plane(src_pixels_y, benchmark_width_,
2844                       reinterpret_cast<uint16_t*>(dst_pixels_y_opt),
2845                       benchmark_width_, 1024, benchmark_width_,
2846                       benchmark_height_);
2847   }
2848 
2849   for (int i = 0; i < kPixels * 2; ++i) {
2850     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
2851   }
2852 
2853   free_aligned_buffer_page_end(src_pixels_y);
2854   free_aligned_buffer_page_end(dst_pixels_y_opt);
2855   free_aligned_buffer_page_end(dst_pixels_y_c);
2856 }
2857 
2858 // TODO(fbarchard): Improve test for more platforms.
2859 #ifdef HAS_CONVERT8TO16ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert8To16Row_Opt)2860 TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
2861   const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
2862   align_buffer_page_end(src_pixels_y, kPixels);
2863   align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
2864   align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
2865 
2866   MemRandomize(src_pixels_y, kPixels);
2867   memset(dst_pixels_y_opt, 0, kPixels * 2);
2868   memset(dst_pixels_y_c, 1, kPixels * 2);
2869 
2870   Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c),
2871                     1024, kPixels);
2872 
2873   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
2874   int has_sse2 = TestCpuFlag(kCpuHasSSE2);
2875   for (int i = 0; i < benchmark_iterations_; ++i) {
2876     if (has_avx2) {
2877       Convert8To16Row_AVX2(src_pixels_y,
2878                            reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
2879                            kPixels);
2880     } else if (has_sse2) {
2881       Convert8To16Row_SSE2(src_pixels_y,
2882                            reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
2883                            kPixels);
2884     } else {
2885       Convert8To16Row_C(src_pixels_y,
2886                         reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
2887                         kPixels);
2888     }
2889   }
2890 
2891   for (int i = 0; i < kPixels * 2; ++i) {
2892     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
2893   }
2894 
2895   free_aligned_buffer_page_end(src_pixels_y);
2896   free_aligned_buffer_page_end(dst_pixels_y_opt);
2897   free_aligned_buffer_page_end(dst_pixels_y_c);
2898 }
2899 #endif  // HAS_CONVERT8TO16ROW_AVX2
2900 
TestScaleMaxSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)2901 float TestScaleMaxSamples(int benchmark_width,
2902                           int benchmark_height,
2903                           int benchmark_iterations,
2904                           float scale,
2905                           bool opt) {
2906   int i, j;
2907   float max_c, max_opt = 0.f;
2908   // NEON does multiple of 8, so round count up
2909   const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
2910   align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48);
2911   uint8_t* dst_c = orig_y + kPixels * 4 + 16;
2912   uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32;
2913 
2914   // Randomize works but may contain some denormals affecting performance.
2915   // MemRandomize(orig_y, kPixels * 4);
2916   // large values are problematic.  audio is really -1 to 1.
2917   for (i = 0; i < kPixels; ++i) {
2918     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
2919   }
2920   memset(dst_c, 0, kPixels * 4);
2921   memset(dst_opt, 1, kPixels * 4);
2922 
2923   max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
2924                             reinterpret_cast<float*>(dst_c), scale, kPixels);
2925 
2926   for (j = 0; j < benchmark_iterations; j++) {
2927     if (opt) {
2928 #ifdef HAS_SCALESUMSAMPLES_NEON
2929       max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y),
2930                                      reinterpret_cast<float*>(dst_opt), scale,
2931                                      kPixels);
2932 #else
2933       max_opt =
2934           ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
2935                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
2936 #endif
2937     } else {
2938       max_opt =
2939           ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
2940                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
2941     }
2942   }
2943 
2944   float max_diff = FAbs(max_opt - max_c);
2945   for (i = 0; i < kPixels; ++i) {
2946     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
2947                           (reinterpret_cast<float*>(dst_opt)[i]));
2948     if (abs_diff > max_diff) {
2949       max_diff = abs_diff;
2950     }
2951   }
2952 
2953   free_aligned_buffer_page_end(orig_y);
2954   return max_diff;
2955 }
2956 
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_C)2957 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) {
2958   float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
2959                                    benchmark_iterations_, 1.2f, false);
2960   EXPECT_EQ(0, diff);
2961 }
2962 
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_Opt)2963 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) {
2964   float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
2965                                    benchmark_iterations_, 1.2f, true);
2966   EXPECT_EQ(0, diff);
2967 }
2968 
TestScaleSumSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)2969 float TestScaleSumSamples(int benchmark_width,
2970                           int benchmark_height,
2971                           int benchmark_iterations,
2972                           float scale,
2973                           bool opt) {
2974   int i, j;
2975   float sum_c, sum_opt = 0.f;
2976   // NEON does multiple of 8, so round count up
2977   const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
2978   align_buffer_page_end(orig_y, kPixels * 4 * 3);
2979   uint8_t* dst_c = orig_y + kPixels * 4;
2980   uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
2981 
2982   // Randomize works but may contain some denormals affecting performance.
2983   // MemRandomize(orig_y, kPixels * 4);
2984   // large values are problematic.  audio is really -1 to 1.
2985   for (i = 0; i < kPixels; ++i) {
2986     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
2987   }
2988   memset(dst_c, 0, kPixels * 4);
2989   memset(dst_opt, 1, kPixels * 4);
2990 
2991   sum_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
2992                             reinterpret_cast<float*>(dst_c), scale, kPixels);
2993 
2994   for (j = 0; j < benchmark_iterations; j++) {
2995     if (opt) {
2996 #ifdef HAS_SCALESUMSAMPLES_NEON
2997       sum_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y),
2998                                      reinterpret_cast<float*>(dst_opt), scale,
2999                                      kPixels);
3000 #else
3001       sum_opt =
3002           ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
3003                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
3004 #endif
3005     } else {
3006       sum_opt =
3007           ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
3008                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
3009     }
3010   }
3011 
3012   float mse_opt = sum_opt / kPixels * 4;
3013   float mse_c = sum_c / kPixels * 4;
3014   float mse_error = FAbs(mse_opt - mse_c) / mse_c;
3015 
3016   // If the sum of a float is more than 4 million, small adds are round down on
3017   // float and produce different results with vectorized sum vs scalar sum.
3018   // Ignore the difference if the sum is large.
3019   float max_diff = 0.f;
3020   if (mse_error > 0.0001 && sum_c < 4000000) {  // allow .01% difference of mse
3021     max_diff = mse_error;
3022   }
3023 
3024   for (i = 0; i < kPixels; ++i) {
3025     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3026                           (reinterpret_cast<float*>(dst_opt)[i]));
3027     if (abs_diff > max_diff) {
3028       max_diff = abs_diff;
3029     }
3030   }
3031 
3032   free_aligned_buffer_page_end(orig_y);
3033   return max_diff;
3034 }
3035 
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_C)3036 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) {
3037   float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
3038                                    benchmark_iterations_, 1.2f, false);
3039   EXPECT_EQ(0, diff);
3040 }
3041 
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_Opt)3042 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) {
3043   float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
3044                                    benchmark_iterations_, 1.2f, true);
3045   EXPECT_EQ(0, diff);
3046 }
3047 
TestScaleSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3048 float TestScaleSamples(int benchmark_width,
3049                        int benchmark_height,
3050                        int benchmark_iterations,
3051                        float scale,
3052                        bool opt) {
3053   int i, j;
3054   // NEON does multiple of 8, so round count up
3055   const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3056   align_buffer_page_end(orig_y, kPixels * 4 * 3);
3057   uint8_t* dst_c = orig_y + kPixels * 4;
3058   uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
3059 
3060   // Randomize works but may contain some denormals affecting performance.
3061   // MemRandomize(orig_y, kPixels * 4);
3062   // large values are problematic.  audio is really -1 to 1.
3063   for (i = 0; i < kPixels; ++i) {
3064     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3065   }
3066   memset(dst_c, 0, kPixels * 4);
3067   memset(dst_opt, 1, kPixels * 4);
3068 
3069   ScaleSamples_C(reinterpret_cast<float*>(orig_y),
3070                  reinterpret_cast<float*>(dst_c), scale, kPixels);
3071 
3072   for (j = 0; j < benchmark_iterations; j++) {
3073     if (opt) {
3074 #ifdef HAS_SCALESUMSAMPLES_NEON
3075       ScaleSamples_NEON(reinterpret_cast<float*>(orig_y),
3076                         reinterpret_cast<float*>(dst_opt), scale, kPixels);
3077 #else
3078       ScaleSamples_C(reinterpret_cast<float*>(orig_y),
3079                      reinterpret_cast<float*>(dst_opt), scale, kPixels);
3080 #endif
3081     } else {
3082       ScaleSamples_C(reinterpret_cast<float*>(orig_y),
3083                      reinterpret_cast<float*>(dst_opt), scale, kPixels);
3084     }
3085   }
3086 
3087   float max_diff = 0.f;
3088   for (i = 0; i < kPixels; ++i) {
3089     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3090                           (reinterpret_cast<float*>(dst_opt)[i]));
3091     if (abs_diff > max_diff) {
3092       max_diff = abs_diff;
3093     }
3094   }
3095 
3096   free_aligned_buffer_page_end(orig_y);
3097   return max_diff;
3098 }
3099 
TEST_F(LibYUVPlanarTest,TestScaleSamples_C)3100 TEST_F(LibYUVPlanarTest, TestScaleSamples_C) {
3101   float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
3102                                 benchmark_iterations_, 1.2f, false);
3103   EXPECT_EQ(0, diff);
3104 }
3105 
TEST_F(LibYUVPlanarTest,TestScaleSamples_Opt)3106 TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) {
3107   float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
3108                                 benchmark_iterations_, 1.2f, true);
3109   EXPECT_EQ(0, diff);
3110 }
3111 
TestCopySamples(int benchmark_width,int benchmark_height,int benchmark_iterations,bool opt)3112 float TestCopySamples(int benchmark_width,
3113                       int benchmark_height,
3114                       int benchmark_iterations,
3115                       bool opt) {
3116   int i, j;
3117   // NEON does multiple of 16 floats, so round count up
3118   const int kPixels = (benchmark_width * benchmark_height + 15) & ~15;
3119   align_buffer_page_end(orig_y, kPixels * 4 * 3);
3120   uint8_t* dst_c = orig_y + kPixels * 4;
3121   uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
3122 
3123   // Randomize works but may contain some denormals affecting performance.
3124   // MemRandomize(orig_y, kPixels * 4);
3125   // large values are problematic.  audio is really -1 to 1.
3126   for (i = 0; i < kPixels; ++i) {
3127     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3128   }
3129   memset(dst_c, 0, kPixels * 4);
3130   memset(dst_opt, 1, kPixels * 4);
3131 
3132   memcpy(reinterpret_cast<void*>(dst_c), reinterpret_cast<void*>(orig_y),
3133          kPixels * 4);
3134 
3135   for (j = 0; j < benchmark_iterations; j++) {
3136     if (opt) {
3137 #ifdef HAS_COPYROW_NEON
3138       CopyRow_NEON(orig_y, dst_opt, kPixels * 4);
3139 #else
3140       CopyRow_C(orig_y, dst_opt, kPixels * 4);
3141 #endif
3142     } else {
3143       CopyRow_C(orig_y, dst_opt, kPixels * 4);
3144     }
3145   }
3146 
3147   float max_diff = 0.f;
3148   for (i = 0; i < kPixels; ++i) {
3149     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3150                           (reinterpret_cast<float*>(dst_opt)[i]));
3151     if (abs_diff > max_diff) {
3152       max_diff = abs_diff;
3153     }
3154   }
3155 
3156   free_aligned_buffer_page_end(orig_y);
3157   return max_diff;
3158 }
3159 
TEST_F(LibYUVPlanarTest,TestCopySamples_C)3160 TEST_F(LibYUVPlanarTest, TestCopySamples_C) {
3161   float diff = TestCopySamples(benchmark_width_, benchmark_height_,
3162                                benchmark_iterations_, false);
3163   EXPECT_EQ(0, diff);
3164 }
3165 
TEST_F(LibYUVPlanarTest,TestCopySamples_Opt)3166 TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) {
3167   float diff = TestCopySamples(benchmark_width_, benchmark_height_,
3168                                benchmark_iterations_, true);
3169   EXPECT_EQ(0, diff);
3170 }
3171 
3172 extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
3173 extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
3174 
TEST_F(LibYUVPlanarTest,TestGaussRow_Opt)3175 TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
3176   SIMD_ALIGNED(uint32_t orig_pixels[640 + 4]);
3177   SIMD_ALIGNED(uint16_t dst_pixels_c[640]);
3178   SIMD_ALIGNED(uint16_t dst_pixels_opt[640]);
3179 
3180   memset(orig_pixels, 0, sizeof(orig_pixels));
3181   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
3182   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
3183 
3184   for (int i = 0; i < 640 + 4; ++i) {
3185     orig_pixels[i] = i * 256;
3186   }
3187   GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640);
3188   for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
3189 #if !defined(LIBYUV_DISABLE_NEON) && \
3190     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
3191     int has_neon = TestCpuFlag(kCpuHasNEON);
3192     if (has_neon) {
3193       GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640);
3194     } else {
3195       GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640);
3196     }
3197 #else
3198     GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 640);
3199 #endif
3200   }
3201 
3202   for (int i = 0; i < 640; ++i) {
3203     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3204   }
3205 
3206   EXPECT_EQ(dst_pixels_c[0],
3207             static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1));
3208   EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256));
3209 }
3210 
3211 extern "C" void GaussCol_NEON(const uint16_t* src0,
3212                               const uint16_t* src1,
3213                               const uint16_t* src2,
3214                               const uint16_t* src3,
3215                               const uint16_t* src4,
3216                               uint32_t* dst,
3217                               int width);
3218 
3219 extern "C" void GaussCol_C(const uint16_t* src0,
3220                            const uint16_t* src1,
3221                            const uint16_t* src2,
3222                            const uint16_t* src3,
3223                            const uint16_t* src4,
3224                            uint32_t* dst,
3225                            int width);
3226 
TEST_F(LibYUVPlanarTest,TestGaussCol_Opt)3227 TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
3228   SIMD_ALIGNED(uint16_t orig_pixels[640 * 5]);
3229   SIMD_ALIGNED(uint32_t dst_pixels_c[640]);
3230   SIMD_ALIGNED(uint32_t dst_pixels_opt[640]);
3231 
3232   memset(orig_pixels, 0, sizeof(orig_pixels));
3233   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
3234   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
3235 
3236   for (int i = 0; i < 640 * 5; ++i) {
3237     orig_pixels[i] = i;
3238   }
3239   GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
3240              &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0],
3241              640);
3242   for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
3243 #if !defined(LIBYUV_DISABLE_NEON) && \
3244     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
3245     int has_neon = TestCpuFlag(kCpuHasNEON);
3246     if (has_neon) {
3247       GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
3248                     &orig_pixels[640 * 3], &orig_pixels[640 * 4],
3249                     &dst_pixels_opt[0], 640);
3250     } else {
3251       GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
3252                  &orig_pixels[640 * 3], &orig_pixels[640 * 4],
3253                  &dst_pixels_opt[0], 640);
3254     }
3255 #else
3256     GaussCol_C(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
3257                &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_opt[0],
3258                640);
3259 #endif
3260   }
3261 
3262   for (int i = 0; i < 640; ++i) {
3263     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3264   }
3265 
3266   EXPECT_EQ(dst_pixels_c[0],
3267             static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + 640 * 3 * 4 +
3268                                   640 * 4 * 1));
3269   EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
3270 }
3271 
TestFloatDivToByte(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3272 float TestFloatDivToByte(int benchmark_width,
3273                          int benchmark_height,
3274                          int benchmark_iterations,
3275                          float scale,
3276                          bool opt) {
3277   int i, j;
3278   // NEON does multiple of 8, so round count up
3279   const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3280   align_buffer_page_end(src_weights, kPixels * 4);
3281   align_buffer_page_end(src_values, kPixels * 4);
3282   align_buffer_page_end(dst_out_c, kPixels);
3283   align_buffer_page_end(dst_out_opt, kPixels);
3284   align_buffer_page_end(dst_mask_c, kPixels);
3285   align_buffer_page_end(dst_mask_opt, kPixels);
3286 
3287   // Randomize works but may contain some denormals affecting performance.
3288   // MemRandomize(orig_y, kPixels * 4);
3289   // large values are problematic.  audio is really -1 to 1.
3290   for (i = 0; i < kPixels; ++i) {
3291     (reinterpret_cast<float*>(src_weights))[i] = scale;
3292     (reinterpret_cast<float*>(src_values))[i] =
3293         sinf(static_cast<float>(i) * 0.1f);
3294   }
3295   memset(dst_out_c, 0, kPixels);
3296   memset(dst_out_opt, 1, kPixels);
3297   memset(dst_mask_c, 2, kPixels);
3298   memset(dst_mask_opt, 3, kPixels);
3299 
3300   FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
3301                       reinterpret_cast<float*>(src_values), dst_out_c,
3302                       dst_mask_c, kPixels);
3303 
3304   for (j = 0; j < benchmark_iterations; j++) {
3305     if (opt) {
3306 #ifdef HAS_FLOATDIVTOBYTEROW_NEON
3307       FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights),
3308                              reinterpret_cast<float*>(src_values), dst_out_opt,
3309                              dst_mask_opt, kPixels);
3310 #else
3311       FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
3312                           reinterpret_cast<float*>(src_values), dst_out_opt,
3313                           dst_mask_opt, kPixels);
3314 #endif
3315     } else {
3316       FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
3317                           reinterpret_cast<float*>(src_values), dst_out_opt,
3318                           dst_mask_opt, kPixels);
3319     }
3320   }
3321 
3322   uint8_t max_diff = 0;
3323   for (i = 0; i < kPixels; ++i) {
3324     uint8_t abs_diff = abs(dst_out_c[i] - dst_out_opt[i]) +
3325                        abs(dst_mask_c[i] - dst_mask_opt[i]);
3326     if (abs_diff > max_diff) {
3327       max_diff = abs_diff;
3328     }
3329   }
3330 
3331   free_aligned_buffer_page_end(src_weights);
3332   free_aligned_buffer_page_end(src_values);
3333   free_aligned_buffer_page_end(dst_out_c);
3334   free_aligned_buffer_page_end(dst_out_opt);
3335   free_aligned_buffer_page_end(dst_mask_c);
3336   free_aligned_buffer_page_end(dst_mask_opt);
3337 
3338   return max_diff;
3339 }
3340 
TEST_F(LibYUVPlanarTest,TestFloatDivToByte_C)3341 TEST_F(LibYUVPlanarTest, TestFloatDivToByte_C) {
3342   float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
3343                                   benchmark_iterations_, 1.2f, false);
3344   EXPECT_EQ(0, diff);
3345 }
3346 
TEST_F(LibYUVPlanarTest,TestFloatDivToByte_Opt)3347 TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) {
3348   float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
3349                                   benchmark_iterations_, 1.2f, true);
3350   EXPECT_EQ(0, diff);
3351 }
3352 
TEST_F(LibYUVPlanarTest,UVToVURow)3353 TEST_F(LibYUVPlanarTest, UVToVURow) {
3354   const int kPixels = benchmark_width_ * benchmark_height_;
3355   align_buffer_page_end(src_pixels_vu, kPixels * 2);
3356   align_buffer_page_end(dst_pixels_uv, kPixels * 2);
3357 
3358   MemRandomize(src_pixels_vu, kPixels * 2);
3359   memset(dst_pixels_uv, 1, kPixels * 2);
3360 
3361   UVToVURow_C(src_pixels_vu, dst_pixels_uv, kPixels);
3362 
3363   for (int i = 0; i < kPixels; ++i) {
3364     EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
3365     EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
3366   }
3367 
3368   free_aligned_buffer_page_end(src_pixels_vu);
3369   free_aligned_buffer_page_end(dst_pixels_uv);
3370 }
3371 
3372 }  // namespace libyuv
3373