1 /*
2 * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <string.h>
22 #include "checkasm.h"
23 #include "libavfilter/colorspacedsp.h"
24 #include "libavutil/common.h"
25 #include "libavutil/internal.h"
26 #include "libavutil/intreadwrite.h"
27 #include "libavutil/mem_internal.h"
28
29 #define W 64
30 #define H 64
31
32 #define randomize_buffers() \
33 do { \
34 unsigned mask = bpp_mask[idepth]; \
35 int n, m; \
36 int bpp = 1 + (!!idepth); \
37 int buf_size = W * H * bpp; \
38 for (m = 0; m < 3; m++) { \
39 int ss = m ? ss_w + ss_h : 0; \
40 int plane_sz = buf_size >> ss; \
41 for (n = 0; n < plane_sz; n += 4) { \
42 unsigned r = rnd() & mask; \
43 AV_WN32A(&src[m][n], r); \
44 } \
45 } \
46 } while (0)
47
48 static const char *format_string[] = {
49 "444", "422", "420"
50 };
51
52 static const unsigned bpp_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
53
check_yuv2yuv(void)54 static void check_yuv2yuv(void)
55 {
56 declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
57 uint8_t *src[3], ptrdiff_t src_stride[3],
58 int w, int h, const int16_t coeff[3][3][8],
59 const int16_t off[2][8]);
60 ColorSpaceDSPContext dsp;
61 int idepth, odepth, fmt, n;
62 LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
63 LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
64 LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
65 uint8_t *src[3] = { src_y, src_u, src_v };
66 LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
67 LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
68 LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
69 LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
70 LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
71 LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
72 uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
73 LOCAL_ALIGNED_32(int16_t, offset_buf, [16]);
74 LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
75 int16_t (*offset)[8] = (int16_t(*)[8]) offset_buf;
76 int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
77
78 ff_colorspacedsp_init(&dsp);
79 for (n = 0; n < 8; n++) {
80 offset[0][n] = offset[1][n] = 16;
81
82 coeff[0][0][n] = (1 << 14) + (1 << 7) + 1;
83 coeff[0][1][n] = (1 << 7) - 1;
84 coeff[0][2][n] = -(1 << 8);
85 coeff[1][0][n] = coeff[2][0][n] = 0;
86 coeff[1][1][n] = (1 << 14) + (1 << 7);
87 coeff[1][2][n] = -(1 << 7);
88 coeff[2][2][n] = (1 << 14) - (1 << 6);
89 coeff[2][1][n] = 1 << 6;
90 }
91 for (idepth = 0; idepth < 3; idepth++) {
92 for (odepth = 0; odepth < 3; odepth++) {
93 for (fmt = 0; fmt < 3; fmt++) {
94 if (check_func(dsp.yuv2yuv[idepth][odepth][fmt],
95 "ff_colorspacedsp_yuv2yuv_%sp%dto%d",
96 format_string[fmt],
97 idepth * 2 + 8, odepth * 2 + 8)) {
98 int ss_w = !!fmt, ss_h = fmt == 2;
99 int y_src_stride = W << !!idepth, y_dst_stride = W << !!odepth;
100 int uv_src_stride = y_src_stride >> ss_w, uv_dst_stride = y_dst_stride >> ss_w;
101
102 randomize_buffers();
103 call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
104 src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
105 W, H, coeff, offset);
106 call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
107 src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
108 W, H, coeff, offset);
109 if (memcmp(dst0[0], dst1[0], y_dst_stride * H) ||
110 memcmp(dst0[1], dst1[1], uv_dst_stride * H >> ss_h) ||
111 memcmp(dst0[2], dst1[2], uv_dst_stride * H >> ss_h)) {
112 fail();
113 }
114 }
115 }
116 }
117 }
118
119 report("yuv2yuv");
120 }
121
check_yuv2rgb(void)122 static void check_yuv2rgb(void)
123 {
124 declare_func(void, int16_t *dst[3], ptrdiff_t dst_stride,
125 uint8_t *src[3], ptrdiff_t src_stride[3],
126 int w, int h, const int16_t coeff[3][3][8],
127 const int16_t off[8]);
128 ColorSpaceDSPContext dsp;
129 int idepth, fmt, n;
130 LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
131 LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
132 LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
133 uint8_t *src[3] = { src_y, src_u, src_v };
134 LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
135 LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
136 LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
137 LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
138 LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
139 LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
140 int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
141 LOCAL_ALIGNED_32(int16_t, offset, [8]);
142 LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
143 int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
144
145 ff_colorspacedsp_init(&dsp);
146 for (n = 0; n < 8; n++) {
147 offset[n] = 16;
148
149 coeff[0][0][n] = coeff[1][0][n] = coeff[2][0][n] = (1 << 14) | 1;
150 coeff[0][1][n] = coeff[2][2][n] = 0;
151 coeff[0][2][n] = 1 << 13;
152 coeff[1][1][n] = -(1 << 12);
153 coeff[1][2][n] = 1 << 12;
154 coeff[2][1][n] = 1 << 11;
155 }
156 for (idepth = 0; idepth < 3; idepth++) {
157 for (fmt = 0; fmt < 3; fmt++) {
158 if (check_func(dsp.yuv2rgb[idepth][fmt],
159 "ff_colorspacedsp_yuv2rgb_%sp%d",
160 format_string[fmt], idepth * 2 + 8)) {
161 int ss_w = !!fmt, ss_h = fmt == 2;
162 int y_src_stride = W << !!idepth;
163 int uv_src_stride = y_src_stride >> ss_w;
164
165 randomize_buffers();
166 call_ref(dst0, W, src,
167 (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
168 W, H, coeff, offset);
169 call_new(dst1, W, src,
170 (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
171 W, H, coeff, offset);
172 if (memcmp(dst0[0], dst1[0], W * H * sizeof(int16_t)) ||
173 memcmp(dst0[1], dst1[1], W * H * sizeof(int16_t)) ||
174 memcmp(dst0[2], dst1[2], W * H * sizeof(int16_t))) {
175 fail();
176 }
177 }
178 }
179 }
180
181 report("yuv2rgb");
182 }
183
184 #undef randomize_buffers
185 #define randomize_buffers() \
186 do { \
187 int y, x, p; \
188 for (p = 0; p < 3; p++) { \
189 for (y = 0; y < H; y++) { \
190 for (x = 0; x < W; x++) { \
191 int r = rnd() & 0x7fff; \
192 r -= (32768 - 28672) >> 1; \
193 src[p][y * W + x] = r; \
194 } \
195 } \
196 } \
197 } while (0)
198
check_rgb2yuv(void)199 static void check_rgb2yuv(void)
200 {
201 declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
202 int16_t *src[3], ptrdiff_t src_stride,
203 int w, int h, const int16_t coeff[3][3][8],
204 const int16_t off[8]);
205 ColorSpaceDSPContext dsp;
206 int odepth, fmt, n;
207 LOCAL_ALIGNED_32(int16_t, src_y, [W * H * 2]);
208 LOCAL_ALIGNED_32(int16_t, src_u, [W * H * 2]);
209 LOCAL_ALIGNED_32(int16_t, src_v, [W * H * 2]);
210 int16_t *src[3] = { src_y, src_u, src_v };
211 LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
212 LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
213 LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
214 LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
215 LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
216 LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
217 uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
218 LOCAL_ALIGNED_32(int16_t, offset, [8]);
219 LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
220 int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
221
222 ff_colorspacedsp_init(&dsp);
223 for (n = 0; n < 8; n++) {
224 offset[n] = 16;
225
226 // these somewhat resemble bt601/smpte170m coefficients
227 coeff[0][0][n] = lrint(0.3 * (1 << 14));
228 coeff[0][1][n] = lrint(0.6 * (1 << 14));
229 coeff[0][2][n] = lrint(0.1 * (1 << 14));
230 coeff[1][0][n] = lrint(-0.15 * (1 << 14));
231 coeff[1][1][n] = lrint(-0.35 * (1 << 14));
232 coeff[1][2][n] = lrint(0.5 * (1 << 14));
233 coeff[2][0][n] = lrint(0.5 * (1 << 14));
234 coeff[2][1][n] = lrint(-0.42 * (1 << 14));
235 coeff[2][2][n] = lrint(-0.08 * (1 << 14));
236 }
237 for (odepth = 0; odepth < 3; odepth++) {
238 for (fmt = 0; fmt < 3; fmt++) {
239 if (check_func(dsp.rgb2yuv[odepth][fmt],
240 "ff_colorspacedsp_rgb2yuv_%sp%d",
241 format_string[fmt], odepth * 2 + 8)) {
242 int ss_w = !!fmt, ss_h = fmt == 2;
243 int y_dst_stride = W << !!odepth;
244 int uv_dst_stride = y_dst_stride >> ss_w;
245
246 randomize_buffers();
247 call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
248 src, W, W, H, coeff, offset);
249 call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
250 src, W, W, H, coeff, offset);
251 if (memcmp(dst0[0], dst1[0], H * y_dst_stride) ||
252 memcmp(dst0[1], dst1[1], H * uv_dst_stride >> ss_h) ||
253 memcmp(dst0[2], dst1[2], H * uv_dst_stride >> ss_h)) {
254 fail();
255 }
256 }
257 }
258 }
259
260 report("rgb2yuv");
261 }
262
check_multiply3x3(void)263 static void check_multiply3x3(void)
264 {
265 declare_func(void, int16_t *data[3], ptrdiff_t stride,
266 int w, int h, const int16_t coeff[3][3][8]);
267 ColorSpaceDSPContext dsp;
268 LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
269 LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
270 LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
271 LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
272 LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
273 LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
274 int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
275 int16_t **src = dst0;
276 LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
277 int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
278 int n;
279
280 ff_colorspacedsp_init(&dsp);
281 for (n = 0; n < 8; n++) {
282 coeff[0][0][n] = lrint(0.85 * (1 << 14));
283 coeff[0][1][n] = lrint(0.10 * (1 << 14));
284 coeff[0][2][n] = lrint(0.05 * (1 << 14));
285 coeff[1][0][n] = lrint(-0.1 * (1 << 14));
286 coeff[1][1][n] = lrint(0.95 * (1 << 14));
287 coeff[1][2][n] = lrint(0.15 * (1 << 14));
288 coeff[2][0][n] = lrint(-0.2 * (1 << 14));
289 coeff[2][1][n] = lrint(0.30 * (1 << 14));
290 coeff[2][2][n] = lrint(0.90 * (1 << 14));
291 }
292 if (check_func(dsp.multiply3x3, "ff_colorspacedsp_multiply3x3")) {
293 randomize_buffers();
294 memcpy(dst1_y, dst0_y, W * H * sizeof(*dst1_y));
295 memcpy(dst1_u, dst0_u, W * H * sizeof(*dst1_u));
296 memcpy(dst1_v, dst0_v, W * H * sizeof(*dst1_v));
297 call_ref(dst0, W, W, H, coeff);
298 call_new(dst1, W, W, H, coeff);
299 if (memcmp(dst0[0], dst1[0], H * W * sizeof(*dst0_y)) ||
300 memcmp(dst0[1], dst1[1], H * W * sizeof(*dst0_u)) ||
301 memcmp(dst0[2], dst1[2], H * W * sizeof(*dst0_v))) {
302 fail();
303 }
304 }
305
306 report("multiply3x3");
307 }
308
checkasm_check_colorspace(void)309 void checkasm_check_colorspace(void)
310 {
311 check_yuv2yuv();
312 check_yuv2rgb();
313 check_rgb2yuv();
314 check_multiply3x3();
315 }
316