• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2015 Henrik Gramner
3  * Copyright (c) 2021 Josh Dekker
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <string.h>
23 #include "checkasm.h"
24 #include "libavcodec/hevcdsp.h"
25 #include "libavutil/common.h"
26 #include "libavutil/internal.h"
27 #include "libavutil/intreadwrite.h"
28 
29 static const uint32_t pixel_mask[] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
30 static const uint32_t pixel_mask16[] = { 0x00ff00ff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
31 static const int sizes[] = { -1, 4, 6, 8, 12, 16, 24, 32, 48, 64 };
32 static const int weights[] = { 0, 128, 255, -1 };
33 static const int denoms[] = {0, 7, 12, -1 };
34 static const int offsets[] = {0, 255, -1 };
35 
36 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
37 #define BUF_SIZE (2 * MAX_PB_SIZE * (2 * 4 + MAX_PB_SIZE))
38 
39 #define randomize_buffers()                          \
40     do {                                             \
41         uint32_t mask = pixel_mask[bit_depth - 8];   \
42         int k;                                       \
43         for (k = 0; k < BUF_SIZE; k += 4) {          \
44             uint32_t r = rnd() & mask;               \
45             AV_WN32A(buf0 + k, r);                   \
46             AV_WN32A(buf1 + k, r);                   \
47             r = rnd();                               \
48             AV_WN32A(dst0 + k, r);                   \
49             AV_WN32A(dst1 + k, r);                   \
50         }                                            \
51     } while (0)
52 
53 #define randomize_buffers_ref()                      \
54     randomize_buffers();                             \
55     do {                                             \
56         uint32_t mask = pixel_mask16[bit_depth - 8]; \
57         int k;                                       \
58         for (k = 0; k < BUF_SIZE; k += 2) {          \
59             uint32_t r = rnd() & mask;               \
60             AV_WN32A(ref0 + k, r);                   \
61             AV_WN32A(ref1 + k, r);                   \
62         }                                            \
63     } while (0)
64 
65 #define src0 (buf0 + 2 * 4 * MAX_PB_SIZE) /* hevc qpel functions read data from negative src pointer offsets */
66 #define src1 (buf1 + 2 * 4 * MAX_PB_SIZE)
67 
checkasm_check_hevc_qpel(void)68 void checkasm_check_hevc_qpel(void)
69 {
70     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
71     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
72     LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
73     LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
74 
75     HEVCDSPContext h;
76     int size, bit_depth, i, j, row;
77     declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
78                                                                   int height, intptr_t mx, intptr_t my, int width);
79 
80     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
81         ff_hevc_dsp_init(&h, bit_depth);
82 
83         for (i = 0; i < 2; i++) {
84             for (j = 0; j < 2; j++) {
85                 for (size = 1; size < 10; size++) {
86                     const char *type;
87                     switch ((j << 1) | i) {
88                     case 0: type = "pel_pixels"; break; // 0 0
89                     case 1: type = "qpel_h"; break; // 0 1
90                     case 2: type = "qpel_v"; break; // 1 0
91                     case 3: type = "qpel_hv"; break; // 1 1
92                     }
93 
94                     if (check_func(h.put_hevc_qpel[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
95                         int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
96                         randomize_buffers();
97                         call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
98                         call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
99                         for (row = 0; row < size[sizes]; row++) {
100                             if (memcmp(dstw0 + row * MAX_PB_SIZE, dstw1 + row * MAX_PB_SIZE, sizes[size] * SIZEOF_PIXEL))
101                                 fail();
102                         }
103                         bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
104                     }
105                 }
106             }
107         }
108     }
109     report("qpel");
110 }
111 
checkasm_check_hevc_qpel_uni(void)112 void checkasm_check_hevc_qpel_uni(void)
113 {
114     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
115     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
116     LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
117     LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
118 
119     HEVCDSPContext h;
120     int size, bit_depth, i, j;
121     declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
122                                                                   int height, intptr_t mx, intptr_t my, int width);
123 
124     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
125         ff_hevc_dsp_init(&h, bit_depth);
126 
127         for (i = 0; i < 2; i++) {
128             for (j = 0; j < 2; j++) {
129                 for (size = 1; size < 10; size++) {
130                     const char *type;
131                     switch ((j << 1) | i) {
132                     case 0: type = "pel_uni_pixels"; break; // 0 0
133                     case 1: type = "qpel_uni_h"; break; // 0 1
134                     case 2: type = "qpel_uni_v"; break; // 1 0
135                     case 3: type = "qpel_uni_hv"; break; // 1 1
136                     }
137 
138                     if (check_func(h.put_hevc_qpel_uni[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
139                         randomize_buffers();
140                         call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
141                         call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
142                         if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
143                             fail();
144                         bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
145                     }
146                 }
147             }
148         }
149     }
150     report("qpel_uni");
151 }
152 
checkasm_check_hevc_qpel_uni_w(void)153 void checkasm_check_hevc_qpel_uni_w(void)
154 {
155     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
156     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
157     LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
158     LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
159 
160     HEVCDSPContext h;
161     int size, bit_depth, i, j;
162     const int *denom, *wx, *ox;
163     declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
164                                                                   int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
165 
166     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
167         ff_hevc_dsp_init(&h, bit_depth);
168 
169         for (i = 0; i < 2; i++) {
170             for (j = 0; j < 2; j++) {
171                 for (size = 1; size < 10; size++) {
172                     const char *type;
173                     switch ((j << 1) | i) {
174                     case 0: type = "pel_uni_w_pixels"; break; // 0 0
175                     case 1: type = "qpel_uni_w_h"; break; // 0 1
176                     case 2: type = "qpel_uni_w_v"; break; // 1 0
177                     case 3: type = "qpel_uni_w_hv"; break; // 1 1
178                     }
179 
180                     if (check_func(h.put_hevc_qpel_uni_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
181                         for (denom = denoms; *denom >= 0; denom++) {
182                             for (wx = weights; *wx >= 0; wx++) {
183                                 for (ox = offsets; *ox >= 0; ox++) {
184                                     randomize_buffers();
185                                     call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
186                                     call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
187                                     if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
188                                         fail();
189                                     bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
190                                 }
191                             }
192                         }
193                     }
194                 }
195             }
196         }
197     }
198     report("qpel_uni_w");
199 }
200 
checkasm_check_hevc_qpel_bi(void)201 void checkasm_check_hevc_qpel_bi(void)
202 {
203     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
204     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
205     LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
206     LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
207     LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
208     LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
209 
210     HEVCDSPContext h;
211     int size, bit_depth, i, j;
212     declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
213                                                                   int16_t *src2,
214                                                                   int height, intptr_t mx, intptr_t my, int width);
215 
216     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
217         ff_hevc_dsp_init(&h, bit_depth);
218 
219         for (i = 0; i < 2; i++) {
220             for (j = 0; j < 2; j++) {
221                 for (size = 1; size < 10; size++) {
222                     const char *type;
223                     switch ((j << 1) | i) {
224                     case 0: type = "pel_bi_pixels"; break; // 0 0
225                     case 1: type = "qpel_bi_h"; break; // 0 1
226                     case 2: type = "qpel_bi_v"; break; // 1 0
227                     case 3: type = "qpel_bi_hv"; break; // 1 1
228                     }
229 
230                     if (check_func(h.put_hevc_qpel_bi[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
231                         randomize_buffers_ref();
232                         call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], i, j, sizes[size]);
233                         call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
234                         if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
235                             fail();
236                         bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
237                     }
238                 }
239             }
240         }
241     }
242     report("qpel_bi");
243 }
244 
checkasm_check_hevc_qpel_bi_w(void)245 void checkasm_check_hevc_qpel_bi_w(void)
246 {
247     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
248     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
249     LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
250     LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
251     LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
252     LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
253 
254     HEVCDSPContext h;
255     int size, bit_depth, i, j;
256     const int *denom, *wx, *ox;
257     declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
258                                                                   int16_t *src2,
259                                                                   int height, int denom, int wx0, int wx1,
260                                                                   int ox0, int ox1, intptr_t mx, intptr_t my, int width);
261 
262     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
263         ff_hevc_dsp_init(&h, bit_depth);
264 
265         for (i = 0; i < 2; i++) {
266             for (j = 0; j < 2; j++) {
267                 for (size = 1; size < 10; size++) {
268                     const char *type;
269                     switch ((j << 1) | i) {
270                     case 0: type = "pel_bi_w_pixels"; break; // 0 0
271                     case 1: type = "qpel_bi_w_h"; break; // 0 1
272                     case 2: type = "qpel_bi_w_v"; break; // 1 0
273                     case 3: type = "qpel_bi_w_hv"; break; // 1 1
274                     }
275 
276                     if (check_func(h.put_hevc_qpel_bi_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
277                         for (denom = denoms; *denom >= 0; denom++) {
278                             for (wx = weights; *wx >= 0; wx++) {
279                                 for (ox = offsets; *ox >= 0; ox++) {
280                                     randomize_buffers_ref();
281                                     call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
282                                     call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
283                                     if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
284                                         fail();
285                                     bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
286                                 }
287                             }
288                         }
289                     }
290                 }
291             }
292         }
293     }
294     report("qpel_bi_w");
295 }
296 
checkasm_check_hevc_epel(void)297 void checkasm_check_hevc_epel(void)
298 {
299     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
300     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
301     LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
302     LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
303 
304     HEVCDSPContext h;
305     int size, bit_depth, i, j, row;
306     declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
307                                                                   int height, intptr_t mx, intptr_t my, int width);
308 
309     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
310         ff_hevc_dsp_init(&h, bit_depth);
311 
312         for (i = 0; i < 2; i++) {
313             for (j = 0; j < 2; j++) {
314                 for (size = 1; size < 10; size++) {
315                     const char *type;
316                     switch ((j << 1) | i) {
317                     case 0: type = "pel_pixels"; break; // 0 0
318                     case 1: type = "epel_h"; break; // 0 1
319                     case 2: type = "epel_v"; break; // 1 0
320                     case 3: type = "epel_hv"; break; // 1 1
321                     }
322 
323                     if (check_func(h.put_hevc_epel[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
324                         int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
325                         randomize_buffers();
326                         call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
327                         call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
328                         for (row = 0; row < size[sizes]; row++) {
329                             if (memcmp(dstw0 + row * MAX_PB_SIZE, dstw1 + row * MAX_PB_SIZE, sizes[size] * SIZEOF_PIXEL))
330                                 fail();
331                         }
332                         bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
333                     }
334                 }
335             }
336         }
337     }
338     report("epel");
339 }
340 
checkasm_check_hevc_epel_uni(void)341 void checkasm_check_hevc_epel_uni(void)
342 {
343     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
344     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
345     LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
346     LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
347 
348     HEVCDSPContext h;
349     int size, bit_depth, i, j;
350     declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
351                                                                   int height, intptr_t mx, intptr_t my, int width);
352 
353     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
354         ff_hevc_dsp_init(&h, bit_depth);
355 
356         for (i = 0; i < 2; i++) {
357             for (j = 0; j < 2; j++) {
358                 for (size = 1; size < 10; size++) {
359                     const char *type;
360                     switch ((j << 1) | i) {
361                     case 0: type = "pel_uni_pixels"; break; // 0 0
362                     case 1: type = "epel_uni_h"; break; // 0 1
363                     case 2: type = "epel_uni_v"; break; // 1 0
364                     case 3: type = "epel_uni_hv"; break; // 1 1
365                     }
366 
367                     if (check_func(h.put_hevc_epel_uni[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
368                         randomize_buffers();
369                         call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
370                         call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
371                         if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
372                             fail();
373                         bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
374                     }
375                 }
376             }
377         }
378     }
379     report("epel_uni");
380 }
381 
checkasm_check_hevc_epel_uni_w(void)382 void checkasm_check_hevc_epel_uni_w(void)
383 {
384     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
385     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
386     LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
387     LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
388 
389     HEVCDSPContext h;
390     int size, bit_depth, i, j;
391     const int *denom, *wx, *ox;
392     declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
393                                                                   int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
394 
395     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
396         ff_hevc_dsp_init(&h, bit_depth);
397 
398         for (i = 0; i < 2; i++) {
399             for (j = 0; j < 2; j++) {
400                 for (size = 1; size < 10; size++) {
401                     const char *type;
402                     switch ((j << 1) | i) {
403                     case 0: type = "pel_uni_w_pixels"; break; // 0 0
404                     case 1: type = "epel_uni_w_h"; break; // 0 1
405                     case 2: type = "epel_uni_w_v"; break; // 1 0
406                     case 3: type = "epel_uni_w_hv"; break; // 1 1
407                     }
408 
409                     if (check_func(h.put_hevc_epel_uni_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
410                         for (denom = denoms; *denom >= 0; denom++) {
411                             for (wx = weights; *wx >= 0; wx++) {
412                                 for (ox = offsets; *ox >= 0; ox++) {
413                                     randomize_buffers();
414                                     call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
415                                     call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
416                                     if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
417                                         fail();
418                                     bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
419                                 }
420                             }
421                         }
422                     }
423                 }
424             }
425         }
426     }
427     report("epel_uni_w");
428 }
429 
checkasm_check_hevc_epel_bi(void)430 void checkasm_check_hevc_epel_bi(void)
431 {
432     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
433     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
434     LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
435     LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
436     LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
437     LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
438 
439     HEVCDSPContext h;
440     int size, bit_depth, i, j;
441     declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
442                                                                   int16_t *src2,
443                                                                   int height, intptr_t mx, intptr_t my, int width);
444 
445     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
446         ff_hevc_dsp_init(&h, bit_depth);
447 
448         for (i = 0; i < 2; i++) {
449             for (j = 0; j < 2; j++) {
450                 for (size = 1; size < 10; size++) {
451                     const char *type;
452                     switch ((j << 1) | i) {
453                     case 0: type = "pel_bi_pixels"; break; // 0 0
454                     case 1: type = "epel_bi_h"; break; // 0 1
455                     case 2: type = "epel_bi_v"; break; // 1 0
456                     case 3: type = "epel_bi_hv"; break; // 1 1
457                     }
458 
459                     if (check_func(h.put_hevc_epel_bi[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
460                         randomize_buffers_ref();
461                         call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], i, j, sizes[size]);
462                         call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
463                         if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
464                             fail();
465                         bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
466                     }
467                 }
468             }
469         }
470     }
471     report("epel_bi");
472 }
473 
checkasm_check_hevc_epel_bi_w(void)474 void checkasm_check_hevc_epel_bi_w(void)
475 {
476     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
477     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
478     LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
479     LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
480     LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
481     LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
482 
483     HEVCDSPContext h;
484     int size, bit_depth, i, j;
485     const int *denom, *wx, *ox;
486     declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
487                                                                   int16_t *src2,
488                                                                   int height, int denom, int wx0, int wx1,
489                                                                   int ox0, int ox1, intptr_t mx, intptr_t my, int width);
490 
491     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
492         ff_hevc_dsp_init(&h, bit_depth);
493 
494         for (i = 0; i < 2; i++) {
495             for (j = 0; j < 2; j++) {
496                 for (size = 1; size < 10; size++) {
497                     const char *type;
498                     switch ((j << 1) | i) {
499                     case 0: type = "pel_bi_w_pixels"; break; // 0 0
500                     case 1: type = "epel_bi_w_h"; break; // 0 1
501                     case 2: type = "epel_bi_w_v"; break; // 1 0
502                     case 3: type = "epel_bi_w_hv"; break; // 1 1
503                     }
504 
505                     if (check_func(h.put_hevc_epel_bi_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
506                         for (denom = denoms; *denom >= 0; denom++) {
507                             for (wx = weights; *wx >= 0; wx++) {
508                                 for (ox = offsets; *ox >= 0; ox++) {
509                                     randomize_buffers_ref();
510                                     call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
511                                     call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
512                                     if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
513                                         fail();
514                                     bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
515                                 }
516                             }
517                         }
518                     }
519                 }
520             }
521         }
522     }
523     report("epel_bi_w");
524 }
525