• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <string.h>
22 #include "checkasm.h"
23 #include "libavfilter/colorspacedsp.h"
24 #include "libavutil/common.h"
25 #include "libavutil/internal.h"
26 #include "libavutil/intreadwrite.h"
27 #include "libavutil/mem_internal.h"
28 
29 #define W 64
30 #define H 64
31 
32 #define randomize_buffers()                     \
33     do {                                        \
34         unsigned mask = bpp_mask[idepth];       \
35         int n, m;                               \
36         int bpp = 1 + (!!idepth);               \
37         int buf_size = W * H * bpp;             \
38         for (m = 0; m < 3; m++) {               \
39             int ss = m ? ss_w + ss_h : 0;       \
40             int plane_sz = buf_size >> ss;      \
41             for (n = 0; n < plane_sz; n += 4) { \
42                 unsigned r = rnd() & mask;      \
43                 AV_WN32A(&src[m][n], r);        \
44             }                                   \
45         }                                       \
46     } while (0)
47 
48 static const char *format_string[] = {
49     "444", "422", "420"
50 };
51 
52 static const unsigned bpp_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
53 
check_yuv2yuv(void)54 static void check_yuv2yuv(void)
55 {
56     declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
57                  uint8_t *src[3], ptrdiff_t src_stride[3],
58                  int w, int h, const int16_t coeff[3][3][8],
59                  const int16_t off[2][8]);
60     ColorSpaceDSPContext dsp;
61     int idepth, odepth, fmt, n;
62     LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
63     LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
64     LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
65     uint8_t *src[3] = { src_y, src_u, src_v };
66     LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
67     LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
68     LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
69     LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
70     LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
71     LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
72     uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
73     LOCAL_ALIGNED_32(int16_t, offset_buf, [16]);
74     LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
75     int16_t (*offset)[8] = (int16_t(*)[8]) offset_buf;
76     int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
77 
78     ff_colorspacedsp_init(&dsp);
79     for (n = 0; n < 8; n++) {
80         offset[0][n] = offset[1][n] = 16;
81 
82         coeff[0][0][n] = (1 << 14) + (1 << 7) + 1;
83         coeff[0][1][n] = (1 << 7) - 1;
84         coeff[0][2][n] = -(1 << 8);
85         coeff[1][0][n] = coeff[2][0][n] = 0;
86         coeff[1][1][n] = (1 << 14) + (1 << 7);
87         coeff[1][2][n] = -(1 << 7);
88         coeff[2][2][n] = (1 << 14) - (1 << 6);
89         coeff[2][1][n] = 1 << 6;
90     }
91     for (idepth = 0; idepth < 3; idepth++) {
92         for (odepth = 0; odepth < 3; odepth++) {
93             for (fmt = 0; fmt < 3; fmt++) {
94                 if (check_func(dsp.yuv2yuv[idepth][odepth][fmt],
95                                "ff_colorspacedsp_yuv2yuv_%sp%dto%d",
96                                format_string[fmt],
97                                idepth * 2 + 8, odepth * 2 + 8)) {
98                     int ss_w = !!fmt, ss_h = fmt == 2;
99                     int y_src_stride = W << !!idepth, y_dst_stride = W << !!odepth;
100                     int uv_src_stride = y_src_stride >> ss_w, uv_dst_stride = y_dst_stride >> ss_w;
101 
102                     randomize_buffers();
103                     call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
104                              src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
105                              W, H, coeff, offset);
106                     call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
107                              src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
108                              W, H, coeff, offset);
109                     if (memcmp(dst0[0], dst1[0], y_dst_stride * H) ||
110                         memcmp(dst0[1], dst1[1], uv_dst_stride * H >> ss_h) ||
111                         memcmp(dst0[2], dst1[2], uv_dst_stride * H >> ss_h)) {
112                         fail();
113                     }
114                 }
115             }
116         }
117     }
118 
119     report("yuv2yuv");
120 }
121 
check_yuv2rgb(void)122 static void check_yuv2rgb(void)
123 {
124     declare_func(void, int16_t *dst[3], ptrdiff_t dst_stride,
125                  uint8_t *src[3], ptrdiff_t src_stride[3],
126                  int w, int h, const int16_t coeff[3][3][8],
127                  const int16_t off[8]);
128     ColorSpaceDSPContext dsp;
129     int idepth, fmt, n;
130     LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
131     LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
132     LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
133     uint8_t *src[3] = { src_y, src_u, src_v };
134     LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
135     LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
136     LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
137     LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
138     LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
139     LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
140     int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
141     LOCAL_ALIGNED_32(int16_t, offset, [8]);
142     LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
143     int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
144 
145     ff_colorspacedsp_init(&dsp);
146     for (n = 0; n < 8; n++) {
147         offset[n] = 16;
148 
149         coeff[0][0][n] = coeff[1][0][n] = coeff[2][0][n] = (1 << 14) | 1;
150         coeff[0][1][n] = coeff[2][2][n] = 0;
151         coeff[0][2][n] = 1 << 13;
152         coeff[1][1][n] = -(1 << 12);
153         coeff[1][2][n] = 1 << 12;
154         coeff[2][1][n] = 1 << 11;
155     }
156     for (idepth = 0; idepth < 3; idepth++) {
157         for (fmt = 0; fmt < 3; fmt++) {
158             if (check_func(dsp.yuv2rgb[idepth][fmt],
159                            "ff_colorspacedsp_yuv2rgb_%sp%d",
160                            format_string[fmt], idepth * 2 + 8)) {
161                 int ss_w = !!fmt, ss_h = fmt == 2;
162                 int y_src_stride = W << !!idepth;
163                 int uv_src_stride = y_src_stride >> ss_w;
164 
165                 randomize_buffers();
166                 call_ref(dst0, W, src,
167                          (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
168                          W, H, coeff, offset);
169                 call_new(dst1, W, src,
170                          (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
171                          W, H, coeff, offset);
172                 if (memcmp(dst0[0], dst1[0], W * H * sizeof(int16_t)) ||
173                     memcmp(dst0[1], dst1[1], W * H * sizeof(int16_t)) ||
174                     memcmp(dst0[2], dst1[2], W * H * sizeof(int16_t))) {
175                     fail();
176                 }
177             }
178         }
179     }
180 
181     report("yuv2rgb");
182 }
183 
184 #undef randomize_buffers
185 #define randomize_buffers()                     \
186     do {                                        \
187         int y, x, p;                            \
188         for (p = 0; p < 3; p++) {               \
189             for (y = 0; y < H; y++) {           \
190                 for (x = 0; x < W; x++) {       \
191                     int r = rnd() & 0x7fff;     \
192                     r -= (32768 - 28672) >> 1;  \
193                     src[p][y * W + x] = r;      \
194                 }                               \
195             }                                   \
196         }                                       \
197     } while (0)
198 
check_rgb2yuv(void)199 static void check_rgb2yuv(void)
200 {
201     declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
202                  int16_t *src[3], ptrdiff_t src_stride,
203                  int w, int h, const int16_t coeff[3][3][8],
204                  const int16_t off[8]);
205     ColorSpaceDSPContext dsp;
206     int odepth, fmt, n;
207     LOCAL_ALIGNED_32(int16_t, src_y, [W * H * 2]);
208     LOCAL_ALIGNED_32(int16_t, src_u, [W * H * 2]);
209     LOCAL_ALIGNED_32(int16_t, src_v, [W * H * 2]);
210     int16_t *src[3] = { src_y, src_u, src_v };
211     LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
212     LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
213     LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
214     LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
215     LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
216     LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
217     uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
218     LOCAL_ALIGNED_32(int16_t, offset, [8]);
219     LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
220     int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
221 
222     ff_colorspacedsp_init(&dsp);
223     for (n = 0; n < 8; n++) {
224         offset[n] = 16;
225 
226         // these somewhat resemble bt601/smpte170m coefficients
227         coeff[0][0][n] = lrint(0.3 * (1 << 14));
228         coeff[0][1][n] = lrint(0.6 * (1 << 14));
229         coeff[0][2][n] = lrint(0.1 * (1 << 14));
230         coeff[1][0][n] = lrint(-0.15 * (1 << 14));
231         coeff[1][1][n] = lrint(-0.35 * (1 << 14));
232         coeff[1][2][n] = lrint(0.5 * (1 << 14));
233         coeff[2][0][n] = lrint(0.5 * (1 << 14));
234         coeff[2][1][n] = lrint(-0.42 * (1 << 14));
235         coeff[2][2][n] = lrint(-0.08 * (1 << 14));
236     }
237     for (odepth = 0; odepth < 3; odepth++) {
238         for (fmt = 0; fmt < 3; fmt++) {
239             if (check_func(dsp.rgb2yuv[odepth][fmt],
240                            "ff_colorspacedsp_rgb2yuv_%sp%d",
241                            format_string[fmt], odepth * 2 + 8)) {
242                 int ss_w = !!fmt, ss_h = fmt == 2;
243                 int y_dst_stride = W << !!odepth;
244                 int uv_dst_stride = y_dst_stride >> ss_w;
245 
246                 randomize_buffers();
247                 call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
248                          src, W, W, H, coeff, offset);
249                 call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
250                          src, W, W, H, coeff, offset);
251                 if (memcmp(dst0[0], dst1[0], H * y_dst_stride) ||
252                     memcmp(dst0[1], dst1[1], H * uv_dst_stride >> ss_h) ||
253                     memcmp(dst0[2], dst1[2], H * uv_dst_stride >> ss_h)) {
254                     fail();
255                 }
256             }
257         }
258     }
259 
260     report("rgb2yuv");
261 }
262 
check_multiply3x3(void)263 static void check_multiply3x3(void)
264 {
265     declare_func(void, int16_t *data[3], ptrdiff_t stride,
266                  int w, int h, const int16_t coeff[3][3][8]);
267     ColorSpaceDSPContext dsp;
268     LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
269     LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
270     LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
271     LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
272     LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
273     LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
274     int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
275     int16_t **src = dst0;
276     LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
277     int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
278     int n;
279 
280     ff_colorspacedsp_init(&dsp);
281     for (n = 0; n < 8; n++) {
282         coeff[0][0][n] = lrint(0.85 * (1 << 14));
283         coeff[0][1][n] = lrint(0.10 * (1 << 14));
284         coeff[0][2][n] = lrint(0.05 * (1 << 14));
285         coeff[1][0][n] = lrint(-0.1 * (1 << 14));
286         coeff[1][1][n] = lrint(0.95 * (1 << 14));
287         coeff[1][2][n] = lrint(0.15 * (1 << 14));
288         coeff[2][0][n] = lrint(-0.2 * (1 << 14));
289         coeff[2][1][n] = lrint(0.30 * (1 << 14));
290         coeff[2][2][n] = lrint(0.90 * (1 << 14));
291     }
292     if (check_func(dsp.multiply3x3, "ff_colorspacedsp_multiply3x3")) {
293         randomize_buffers();
294         memcpy(dst1_y, dst0_y, W * H * sizeof(*dst1_y));
295         memcpy(dst1_u, dst0_u, W * H * sizeof(*dst1_u));
296         memcpy(dst1_v, dst0_v, W * H * sizeof(*dst1_v));
297         call_ref(dst0, W, W, H, coeff);
298         call_new(dst1, W, W, H, coeff);
299         if (memcmp(dst0[0], dst1[0], H * W * sizeof(*dst0_y)) ||
300             memcmp(dst0[1], dst1[1], H * W * sizeof(*dst0_u)) ||
301             memcmp(dst0[2], dst1[2], H * W * sizeof(*dst0_v))) {
302             fail();
303         }
304     }
305 
306     report("multiply3x3");
307 }
308 
checkasm_check_colorspace(void)309 void checkasm_check_colorspace(void)
310 {
311     check_yuv2yuv();
312     check_yuv2rgb();
313     check_rgb2yuv();
314     check_multiply3x3();
315 }
316