• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "tests/checkasm/checkasm.h"
29 
30 #include <math.h>
31 
32 #include "src/itx.h"
33 #include "src/levels.h"
34 #include "src/scan.h"
35 #include "src/tables.h"
36 
37 #ifndef M_PI
38 #define M_PI 3.14159265358979323846
39 #endif
40 #ifndef M_SQRT1_2
41 #define M_SQRT1_2 0.707106781186547524401
42 #endif
43 
44 enum Tx1D { DCT, ADST, FLIPADST, IDENTITY, WHT };
45 
46 static const uint8_t itx_1d_types[N_TX_TYPES_PLUS_LL][2] = {
47     [DCT_DCT]           = { DCT,      DCT      },
48     [ADST_DCT]          = { DCT,      ADST     },
49     [DCT_ADST]          = { ADST,     DCT      },
50     [ADST_ADST]         = { ADST,     ADST     },
51     [FLIPADST_DCT]      = { DCT,      FLIPADST },
52     [DCT_FLIPADST]      = { FLIPADST, DCT      },
53     [FLIPADST_FLIPADST] = { FLIPADST, FLIPADST },
54     [ADST_FLIPADST]     = { FLIPADST, ADST     },
55     [FLIPADST_ADST]     = { ADST,     FLIPADST },
56     [IDTX]              = { IDENTITY, IDENTITY },
57     [V_DCT]             = { IDENTITY, DCT      },
58     [H_DCT]             = { DCT,      IDENTITY },
59     [V_ADST]            = { IDENTITY, ADST     },
60     [H_ADST]            = { ADST,     IDENTITY },
61     [V_FLIPADST]        = { IDENTITY, FLIPADST },
62     [H_FLIPADST]        = { FLIPADST, IDENTITY },
63     [WHT_WHT]           = { WHT,      WHT      },
64 };
65 
66 static const char *const itx_1d_names[5] = {
67     [DCT]      = "dct",
68     [ADST]     = "adst",
69     [FLIPADST] = "flipadst",
70     [IDENTITY] = "identity",
71     [WHT]      = "wht"
72 };
73 
74 static const double scaling_factors[9] = {
75     4.0000,             /*  4x4                          */
76     4.0000 * M_SQRT1_2, /*  4x8   8x4                    */
77     2.0000,             /*  4x16  8x8  16x4              */
78     2.0000 * M_SQRT1_2, /*        8x16 16x8              */
79     1.0000,             /*        8x32 16x16 32x8        */
80     0.5000 * M_SQRT1_2, /*             16x32 32x16       */
81     0.2500,             /*             16x64 32x32 64x16 */
82     0.1250 * M_SQRT1_2, /*                   32x64 64x32 */
83     0.0625,             /*                         64x64 */
84 };
85 
86 /* FIXME: Ensure that those forward transforms are similar to the real AV1
87  * transforms. The FLIPADST currently uses the ADST forward transform for
88  * example which is obviously "incorrect", but we're just using it for now
89  * since it does produce coefficients in the correct range at least. */
90 
91 /* DCT-II */
fdct_1d(double * const out,const double * const in,const int sz)92 static void fdct_1d(double *const out, const double *const in, const int sz) {
93     for (int i = 0; i < sz; i++) {
94         out[i] = 0.0;
95         for (int j = 0; j < sz; j++)
96             out[i] += in[j] * cos(M_PI * (2 * j + 1) * i / (sz * 2.0));
97     }
98     out[0] *= M_SQRT1_2;
99 }
100 
101 /* See "Towards jointly optimal spatial prediction and adaptive transform in
102  * video/image coding", by J. Han, A. Saxena, and K. Rose
103  * IEEE Proc. ICASSP, pp. 726-729, Mar. 2010.
104  * and "A Butterfly Structured Design of The Hybrid Transform Coding Scheme",
105  * by Jingning Han, Yaowu Xu, and Debargha Mukherjee
106  * http://research.google.com/pubs/archive/41418.pdf
107  */
fadst_1d(double * const out,const double * const in,const int sz)108 static void fadst_1d(double *const out, const double *const in, const int sz) {
109     for (int i = 0; i < sz; i++) {
110         out[i] = 0.0;
111         for (int j = 0; j < sz; j++)
112             out[i] += in[j] * sin(M_PI *
113             (sz == 4 ? (    j + 1) * (2 * i + 1) / (8.0 + 1.0) :
114                        (2 * j + 1) * (2 * i + 1) / (sz * 4.0)));
115     }
116 }
117 
fwht4_1d(double * const out,const double * const in)118 static void fwht4_1d(double *const out, const double *const in)
119 {
120     const double t0 = in[0] + in[1];
121     const double t3 = in[3] - in[2];
122     const double t4 = (t0 - t3) * 0.5;
123     const double t1 = t4 - in[1];
124     const double t2 = t4 - in[2];
125     out[0] = t0 - t2;
126     out[1] = t2;
127     out[2] = t3 + t1;
128     out[3] = t1;
129 }
130 
copy_subcoefs(coef * coeff,const enum RectTxfmSize tx,const enum TxfmType txtp,const int sw,const int sh,const int subsh)131 static int copy_subcoefs(coef *coeff,
132                          const enum RectTxfmSize tx, const enum TxfmType txtp,
133                          const int sw, const int sh, const int subsh)
134 {
135     /* copy the topleft coefficients such that the return value (being the
136      * coefficient scantable index for the eob token) guarantees that only
137      * the topleft $sub out of $sz (where $sz >= $sub) coefficients in both
138      * dimensions are non-zero. This leads to braching to specific optimized
139      * simd versions (e.g. dc-only) so that we get full asm coverage in this
140      * test */
141 
142     const enum TxClass tx_class = dav1d_tx_type_class[txtp];
143     const uint16_t *const scan = dav1d_scans[tx];
144     const int sub_high = subsh > 0 ? subsh * 8 - 1 : 0;
145     const int sub_low  = subsh > 1 ? sub_high - 8 : 0;
146     int n, eob;
147 
148     for (n = 0, eob = 0; n < sw * sh; n++) {
149         int rc, rcx, rcy;
150         if (tx_class == TX_CLASS_2D)
151             rc = scan[n], rcx = rc % sh, rcy = rc / sh;
152         else if (tx_class == TX_CLASS_H)
153             rcx = n % sh, rcy = n / sh, rc = n;
154         else /* tx_class == TX_CLASS_V */
155             rcx = n / sw, rcy = n % sw, rc = rcy * sh + rcx;
156 
157         /* Pick a random eob within this sub-itx */
158         if (rcx > sub_high || rcy > sub_high) {
159             break; /* upper boundary */
160         } else if (!eob && (rcx > sub_low || rcy > sub_low))
161             eob = n; /* lower boundary */
162     }
163 
164     if (eob)
165         eob += rnd() % (n - eob - 1);
166     if (tx_class == TX_CLASS_2D)
167         for (n = eob + 1; n < sw * sh; n++)
168             coeff[scan[n]] = 0;
169     else if (tx_class == TX_CLASS_H)
170         for (n = eob + 1; n < sw * sh; n++)
171             coeff[n] = 0;
172     else /* tx_class == TX_CLASS_V */ {
173         for (int rcx = eob / sw, rcy = eob % sw; rcx < sh; rcx++, rcy = -1)
174             while (++rcy < sw)
175                 coeff[rcy * sh + rcx] = 0;
176         n = sw * sh;
177     }
178     for (; n < 32 * 32; n++)
179         coeff[n] = rnd();
180     return eob;
181 }
182 
ftx(coef * const buf,const enum RectTxfmSize tx,const enum TxfmType txtp,const int w,const int h,const int subsh,const int bitdepth_max)183 static int ftx(coef *const buf, const enum RectTxfmSize tx,
184                const enum TxfmType txtp, const int w, const int h,
185                const int subsh, const int bitdepth_max)
186 {
187     double out[64 * 64], temp[64 * 64];
188     const double scale = scaling_factors[ctz(w * h) - 4];
189     const int sw = imin(w, 32), sh = imin(h, 32);
190 
191     for (int i = 0; i < h; i++) {
192         double in[64], temp_out[64];
193 
194         for (int i = 0; i < w; i++)
195             in[i] = (rnd() & (2 * bitdepth_max + 1)) - bitdepth_max;
196 
197         switch (itx_1d_types[txtp][0]) {
198         case DCT:
199             fdct_1d(temp_out, in, w);
200             break;
201         case ADST:
202         case FLIPADST:
203             fadst_1d(temp_out, in, w);
204             break;
205         case WHT:
206             fwht4_1d(temp_out, in);
207             break;
208         case IDENTITY:
209             memcpy(temp_out, in, w * sizeof(*temp_out));
210             break;
211         }
212 
213         for (int j = 0; j < w; j++)
214             temp[j * h + i] = temp_out[j] * scale;
215     }
216 
217     for (int i = 0; i < w; i++) {
218         switch (itx_1d_types[txtp][0]) {
219         case DCT:
220             fdct_1d(&out[i * h], &temp[i * h], h);
221             break;
222         case ADST:
223         case FLIPADST:
224             fadst_1d(&out[i * h], &temp[i * h], h);
225             break;
226         case WHT:
227             fwht4_1d(&out[i * h], &temp[i * h]);
228             break;
229         case IDENTITY:
230             memcpy(&out[i * h], &temp[i * h], h * sizeof(*out));
231             break;
232         }
233     }
234 
235     for (int y = 0; y < sh; y++)
236         for (int x = 0; x < sw; x++)
237             buf[y * sw + x] = (coef) (out[y * w + x] + 0.5);
238 
239     return copy_subcoefs(buf, tx, txtp, sw, sh, subsh);
240 }
241 
check_itxfm_add(Dav1dInvTxfmDSPContext * const c,const enum RectTxfmSize tx)242 static void check_itxfm_add(Dav1dInvTxfmDSPContext *const c,
243                             const enum RectTxfmSize tx)
244 {
245     ALIGN_STK_64(coef, coeff, 2, [32 * 32]);
246     PIXEL_RECT(c_dst, 64, 64);
247     PIXEL_RECT(a_dst, 64, 64);
248 
249     static const uint8_t subsh_iters[5] = { 2, 2, 3, 5, 5 };
250 
251     const int w = dav1d_txfm_dimensions[tx].w * 4;
252     const int h = dav1d_txfm_dimensions[tx].h * 4;
253     const int subsh_max = subsh_iters[imax(dav1d_txfm_dimensions[tx].lw,
254                                            dav1d_txfm_dimensions[tx].lh)];
255 #if BITDEPTH == 16
256     const int bpc_min = 10, bpc_max = 12;
257 #else
258     const int bpc_min = 8, bpc_max = 8;
259 #endif
260 
261     declare_func(void, pixel *dst, ptrdiff_t dst_stride, coef *coeff,
262                  int eob HIGHBD_DECL_SUFFIX);
263 
264     for (int bpc = bpc_min; bpc <= bpc_max; bpc += 2) {
265         bitfn(dav1d_itx_dsp_init)(c, bpc);
266         for (enum TxfmType txtp = 0; txtp < N_TX_TYPES_PLUS_LL; txtp++)
267             for (int subsh = 0; subsh < subsh_max; subsh++)
268                 if (check_func(c->itxfm_add[tx][txtp],
269                                "inv_txfm_add_%dx%d_%s_%s_%d_%dbpc",
270                                w, h, itx_1d_names[itx_1d_types[txtp][0]],
271                                itx_1d_names[itx_1d_types[txtp][1]], subsh,
272                                bpc))
273                 {
274                     const int bitdepth_max = (1 << bpc) - 1;
275                     const int eob = ftx(coeff[0], tx, txtp, w, h, subsh, bitdepth_max);
276                     memcpy(coeff[1], coeff[0], sizeof(*coeff));
277 
278                     CLEAR_PIXEL_RECT(c_dst);
279                     CLEAR_PIXEL_RECT(a_dst);
280 
281                     for (int y = 0; y < h; y++)
282                         for (int x = 0; x < w; x++)
283                             c_dst[y*PXSTRIDE(c_dst_stride) + x] =
284                             a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
285 
286                     call_ref(c_dst, c_dst_stride, coeff[0], eob
287                              HIGHBD_TAIL_SUFFIX);
288                     call_new(a_dst, a_dst_stride, coeff[1], eob
289                              HIGHBD_TAIL_SUFFIX);
290 
291                     checkasm_check_pixel_padded(c_dst, c_dst_stride,
292                                                 a_dst, a_dst_stride,
293                                                 w, h, "dst");
294                     if (memcmp(coeff[0], coeff[1], sizeof(*coeff)))
295                         fail();
296 
297                     bench_new(alternate(c_dst, a_dst), a_dst_stride,
298                               alternate(coeff[0], coeff[1]), eob HIGHBD_TAIL_SUFFIX);
299                 }
300     }
301     report("add_%dx%d", w, h);
302 }
303 
bitfn(checkasm_check_itx)304 void bitfn(checkasm_check_itx)(void) {
305     static const uint8_t txfm_size_order[N_RECT_TX_SIZES] = {
306         TX_4X4,   RTX_4X8,  RTX_4X16,
307         RTX_8X4,  TX_8X8,   RTX_8X16,  RTX_8X32,
308         RTX_16X4, RTX_16X8, TX_16X16,  RTX_16X32, RTX_16X64,
309                   RTX_32X8, RTX_32X16, TX_32X32,  RTX_32X64,
310                             RTX_64X16, RTX_64X32, TX_64X64
311     };
312 
313     /* Zero unused function pointer elements. */
314     Dav1dInvTxfmDSPContext c = { { { 0 } } };
315 
316     for (int i = 0; i < N_RECT_TX_SIZES; i++)
317         check_itxfm_add(&c, txfm_size_order[i]);
318 }
319