• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * VC-1 and WMV3 decoder - DSP functions
3  * Copyright (c) 2006 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * VC-1 and WMV3 decoder
25  */
26 
27 #include "config_components.h"
28 
29 #include "libavutil/avassert.h"
30 #include "libavutil/common.h"
31 #include "libavutil/intreadwrite.h"
32 #include "h264chroma.h"
33 #include "qpeldsp.h"
34 #include "rnd_avg.h"
35 #include "vc1dsp.h"
36 #include "startcode.h"
37 #include "vc1_common.h"
38 
39 /* Apply overlap transform to horizontal edge */
vc1_v_overlap_c(uint8_t * src,ptrdiff_t stride)40 static void vc1_v_overlap_c(uint8_t *src, ptrdiff_t stride)
41 {
42     int i;
43     int a, b, c, d;
44     int d1, d2;
45     int rnd = 1;
46     for (i = 0; i < 8; i++) {
47         a  = src[-2 * stride];
48         b  = src[-stride];
49         c  = src[0];
50         d  = src[stride];
51         d1 = (a - d + 3 + rnd) >> 3;
52         d2 = (a - d + b - c + 4 - rnd) >> 3;
53 
54         src[-2 * stride] = a - d1;
55         src[-stride]     = av_clip_uint8(b - d2);
56         src[0]           = av_clip_uint8(c + d2);
57         src[stride]      = d + d1;
58         src++;
59         rnd = !rnd;
60     }
61 }
62 
63 /* Apply overlap transform to vertical edge */
vc1_h_overlap_c(uint8_t * src,ptrdiff_t stride)64 static void vc1_h_overlap_c(uint8_t *src, ptrdiff_t stride)
65 {
66     int i;
67     int a, b, c, d;
68     int d1, d2;
69     int rnd = 1;
70     for (i = 0; i < 8; i++) {
71         a  = src[-2];
72         b  = src[-1];
73         c  = src[0];
74         d  = src[1];
75         d1 = (a - d + 3 + rnd) >> 3;
76         d2 = (a - d + b - c + 4 - rnd) >> 3;
77 
78         src[-2] = a - d1;
79         src[-1] = av_clip_uint8(b - d2);
80         src[0]  = av_clip_uint8(c + d2);
81         src[1]  = d + d1;
82         src    += stride;
83         rnd     = !rnd;
84     }
85 }
86 
vc1_v_s_overlap_c(int16_t * top,int16_t * bottom)87 static void vc1_v_s_overlap_c(int16_t *top, int16_t *bottom)
88 {
89     int i;
90     int a, b, c, d;
91     int d1, d2;
92     int rnd1 = 4, rnd2 = 3;
93     for (i = 0; i < 8; i++) {
94         a  = top[48];
95         b  = top[56];
96         c  = bottom[0];
97         d  = bottom[8];
98         d1 = a - d;
99         d2 = a - d + b - c;
100 
101         top[48]   = ((a * 8) - d1 + rnd1) >> 3;
102         top[56]   = ((b * 8) - d2 + rnd2) >> 3;
103         bottom[0] = ((c * 8) + d2 + rnd1) >> 3;
104         bottom[8] = ((d * 8) + d1 + rnd2) >> 3;
105 
106         bottom++;
107         top++;
108         rnd2 = 7 - rnd2;
109         rnd1 = 7 - rnd1;
110     }
111 }
112 
vc1_h_s_overlap_c(int16_t * left,int16_t * right,ptrdiff_t left_stride,ptrdiff_t right_stride,int flags)113 static void vc1_h_s_overlap_c(int16_t *left, int16_t *right, ptrdiff_t left_stride, ptrdiff_t right_stride, int flags)
114 {
115     int i;
116     int a, b, c, d;
117     int d1, d2;
118     int rnd1 = flags & 2 ? 3 : 4;
119     int rnd2 = 7 - rnd1;
120     for (i = 0; i < 8; i++) {
121         a  = left[6];
122         b  = left[7];
123         c  = right[0];
124         d  = right[1];
125         d1 = a - d;
126         d2 = a - d + b - c;
127 
128         left[6]  = ((a * 8) - d1 + rnd1) >> 3;
129         left[7]  = ((b * 8) - d2 + rnd2) >> 3;
130         right[0] = ((c * 8) + d2 + rnd1) >> 3;
131         right[1] = ((d * 8) + d1 + rnd2) >> 3;
132 
133         right += right_stride;
134         left  += left_stride;
135         if (flags & 1) {
136             rnd2   = 7 - rnd2;
137             rnd1   = 7 - rnd1;
138         }
139     }
140 }
141 
142 /**
143  * VC-1 in-loop deblocking filter for one line
144  * @param src source block type
145  * @param stride block stride
146  * @param pq block quantizer
147  * @return whether other 3 pairs should be filtered or not
148  * @see 8.6
149  */
vc1_filter_line(uint8_t * src,ptrdiff_t stride,int pq)150 static av_always_inline int vc1_filter_line(uint8_t *src, ptrdiff_t stride, int pq)
151 {
152     int a0 = (2 * (src[-2 * stride] - src[1 * stride]) -
153               5 * (src[-1 * stride] - src[0 * stride]) + 4) >> 3;
154     int a0_sign = a0 >> 31;        /* Store sign */
155 
156     a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
157     if (a0 < pq) {
158         int a1 = FFABS((2 * (src[-4 * stride] - src[-1 * stride]) -
159                         5 * (src[-3 * stride] - src[-2 * stride]) + 4) >> 3);
160         int a2 = FFABS((2 * (src[ 0 * stride] - src[ 3 * stride]) -
161                         5 * (src[ 1 * stride] - src[ 2 * stride]) + 4) >> 3);
162         if (a1 < a0 || a2 < a0) {
163             int clip      = src[-1 * stride] - src[0 * stride];
164             int clip_sign = clip >> 31;
165 
166             clip = ((clip ^ clip_sign) - clip_sign) >> 1;
167             if (clip) {
168                 int a3     = FFMIN(a1, a2);
169                 int d      = 5 * (a3 - a0);
170                 int d_sign = (d >> 31);
171 
172                 d       = ((d ^ d_sign) - d_sign) >> 3;
173                 d_sign ^= a0_sign;
174 
175                 if (d_sign ^ clip_sign)
176                     d = 0;
177                 else {
178                     d = FFMIN(d, clip);
179                     d = (d ^ d_sign) - d_sign; /* Restore sign */
180                     src[-1 * stride] = av_clip_uint8(src[-1 * stride] - d);
181                     src[ 0 * stride] = av_clip_uint8(src[ 0 * stride] + d);
182                 }
183                 return 1;
184             }
185         }
186     }
187     return 0;
188 }
189 
190 /**
191  * VC-1 in-loop deblocking filter
192  * @param src source block type
193  * @param step distance between horizontally adjacent elements
194  * @param stride distance between vertically adjacent elements
195  * @param len edge length to filter (4 or 8 pixels)
196  * @param pq block quantizer
197  * @see 8.6
198  */
vc1_loop_filter(uint8_t * src,int step,ptrdiff_t stride,int len,int pq)199 static inline void vc1_loop_filter(uint8_t *src, int step, ptrdiff_t stride,
200                                    int len, int pq)
201 {
202     int i;
203     int filt3;
204 
205     for (i = 0; i < len; i += 4) {
206         filt3 = vc1_filter_line(src + 2 * step, stride, pq);
207         if (filt3) {
208             vc1_filter_line(src + 0 * step, stride, pq);
209             vc1_filter_line(src + 1 * step, stride, pq);
210             vc1_filter_line(src + 3 * step, stride, pq);
211         }
212         src += step * 4;
213     }
214 }
215 
vc1_v_loop_filter4_c(uint8_t * src,ptrdiff_t stride,int pq)216 static void vc1_v_loop_filter4_c(uint8_t *src, ptrdiff_t stride, int pq)
217 {
218     vc1_loop_filter(src, 1, stride, 4, pq);
219 }
220 
vc1_h_loop_filter4_c(uint8_t * src,ptrdiff_t stride,int pq)221 static void vc1_h_loop_filter4_c(uint8_t *src, ptrdiff_t stride, int pq)
222 {
223     vc1_loop_filter(src, stride, 1, 4, pq);
224 }
225 
vc1_v_loop_filter8_c(uint8_t * src,ptrdiff_t stride,int pq)226 static void vc1_v_loop_filter8_c(uint8_t *src, ptrdiff_t stride, int pq)
227 {
228     vc1_loop_filter(src, 1, stride, 8, pq);
229 }
230 
vc1_h_loop_filter8_c(uint8_t * src,ptrdiff_t stride,int pq)231 static void vc1_h_loop_filter8_c(uint8_t *src, ptrdiff_t stride, int pq)
232 {
233     vc1_loop_filter(src, stride, 1, 8, pq);
234 }
235 
vc1_v_loop_filter16_c(uint8_t * src,ptrdiff_t stride,int pq)236 static void vc1_v_loop_filter16_c(uint8_t *src, ptrdiff_t stride, int pq)
237 {
238     vc1_loop_filter(src, 1, stride, 16, pq);
239 }
240 
vc1_h_loop_filter16_c(uint8_t * src,ptrdiff_t stride,int pq)241 static void vc1_h_loop_filter16_c(uint8_t *src, ptrdiff_t stride, int pq)
242 {
243     vc1_loop_filter(src, stride, 1, 16, pq);
244 }
245 
246 /* Do inverse transform on 8x8 block */
vc1_inv_trans_8x8_dc_c(uint8_t * dest,ptrdiff_t stride,int16_t * block)247 static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
248 {
249     int i;
250     int dc = block[0];
251 
252     dc = (3 * dc +  1) >> 1;
253     dc = (3 * dc + 16) >> 5;
254 
255     for (i = 0; i < 8; i++) {
256         dest[0] = av_clip_uint8(dest[0] + dc);
257         dest[1] = av_clip_uint8(dest[1] + dc);
258         dest[2] = av_clip_uint8(dest[2] + dc);
259         dest[3] = av_clip_uint8(dest[3] + dc);
260         dest[4] = av_clip_uint8(dest[4] + dc);
261         dest[5] = av_clip_uint8(dest[5] + dc);
262         dest[6] = av_clip_uint8(dest[6] + dc);
263         dest[7] = av_clip_uint8(dest[7] + dc);
264         dest += stride;
265     }
266 }
267 
vc1_inv_trans_8x8_c(int16_t block[64])268 static void vc1_inv_trans_8x8_c(int16_t block[64])
269 {
270     int i;
271     register int t1, t2, t3, t4, t5, t6, t7, t8;
272     int16_t *src, *dst, temp[64];
273 
274     src = block;
275     dst = temp;
276     for (i = 0; i < 8; i++) {
277         t1 = 12 * (src[ 0] + src[32]) + 4;
278         t2 = 12 * (src[ 0] - src[32]) + 4;
279         t3 = 16 * src[16] +  6 * src[48];
280         t4 =  6 * src[16] - 16 * src[48];
281 
282         t5 = t1 + t3;
283         t6 = t2 + t4;
284         t7 = t2 - t4;
285         t8 = t1 - t3;
286 
287         t1 = 16 * src[ 8] + 15 * src[24] +  9 * src[40] +  4 * src[56];
288         t2 = 15 * src[ 8] -  4 * src[24] - 16 * src[40] -  9 * src[56];
289         t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
290         t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
291 
292         dst[0] = (t5 + t1) >> 3;
293         dst[1] = (t6 + t2) >> 3;
294         dst[2] = (t7 + t3) >> 3;
295         dst[3] = (t8 + t4) >> 3;
296         dst[4] = (t8 - t4) >> 3;
297         dst[5] = (t7 - t3) >> 3;
298         dst[6] = (t6 - t2) >> 3;
299         dst[7] = (t5 - t1) >> 3;
300 
301         src += 1;
302         dst += 8;
303     }
304 
305     src = temp;
306     dst = block;
307     for (i = 0; i < 8; i++) {
308         t1 = 12 * (src[ 0] + src[32]) + 64;
309         t2 = 12 * (src[ 0] - src[32]) + 64;
310         t3 = 16 * src[16] +  6 * src[48];
311         t4 =  6 * src[16] - 16 * src[48];
312 
313         t5 = t1 + t3;
314         t6 = t2 + t4;
315         t7 = t2 - t4;
316         t8 = t1 - t3;
317 
318         t1 = 16 * src[ 8] + 15 * src[24] +  9 * src[40] +  4 * src[56];
319         t2 = 15 * src[ 8] -  4 * src[24] - 16 * src[40] -  9 * src[56];
320         t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
321         t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
322 
323         dst[ 0] = (t5 + t1) >> 7;
324         dst[ 8] = (t6 + t2) >> 7;
325         dst[16] = (t7 + t3) >> 7;
326         dst[24] = (t8 + t4) >> 7;
327         dst[32] = (t8 - t4 + 1) >> 7;
328         dst[40] = (t7 - t3 + 1) >> 7;
329         dst[48] = (t6 - t2 + 1) >> 7;
330         dst[56] = (t5 - t1 + 1) >> 7;
331 
332         src++;
333         dst++;
334     }
335 }
336 
337 /* Do inverse transform on 8x4 part of block */
vc1_inv_trans_8x4_dc_c(uint8_t * dest,ptrdiff_t stride,int16_t * block)338 static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
339 {
340     int i;
341     int dc = block[0];
342 
343     dc =  (3 * dc +  1) >> 1;
344     dc = (17 * dc + 64) >> 7;
345 
346     for (i = 0; i < 4; i++) {
347         dest[0] = av_clip_uint8(dest[0] + dc);
348         dest[1] = av_clip_uint8(dest[1] + dc);
349         dest[2] = av_clip_uint8(dest[2] + dc);
350         dest[3] = av_clip_uint8(dest[3] + dc);
351         dest[4] = av_clip_uint8(dest[4] + dc);
352         dest[5] = av_clip_uint8(dest[5] + dc);
353         dest[6] = av_clip_uint8(dest[6] + dc);
354         dest[7] = av_clip_uint8(dest[7] + dc);
355         dest += stride;
356     }
357 }
358 
vc1_inv_trans_8x4_c(uint8_t * dest,ptrdiff_t stride,int16_t * block)359 static void vc1_inv_trans_8x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
360 {
361     int i;
362     register int t1, t2, t3, t4, t5, t6, t7, t8;
363     int16_t *src, *dst;
364 
365     src = block;
366     dst = block;
367 
368     for (i = 0; i < 4; i++) {
369         t1 = 12 * (src[0] + src[4]) + 4;
370         t2 = 12 * (src[0] - src[4]) + 4;
371         t3 = 16 * src[2] +  6 * src[6];
372         t4 =  6 * src[2] - 16 * src[6];
373 
374         t5 = t1 + t3;
375         t6 = t2 + t4;
376         t7 = t2 - t4;
377         t8 = t1 - t3;
378 
379         t1 = 16 * src[1] + 15 * src[3] +  9 * src[5] +  4 * src[7];
380         t2 = 15 * src[1] -  4 * src[3] - 16 * src[5] -  9 * src[7];
381         t3 =  9 * src[1] - 16 * src[3] +  4 * src[5] + 15 * src[7];
382         t4 =  4 * src[1] -  9 * src[3] + 15 * src[5] - 16 * src[7];
383 
384         dst[0] = (t5 + t1) >> 3;
385         dst[1] = (t6 + t2) >> 3;
386         dst[2] = (t7 + t3) >> 3;
387         dst[3] = (t8 + t4) >> 3;
388         dst[4] = (t8 - t4) >> 3;
389         dst[5] = (t7 - t3) >> 3;
390         dst[6] = (t6 - t2) >> 3;
391         dst[7] = (t5 - t1) >> 3;
392 
393         src += 8;
394         dst += 8;
395     }
396 
397     src = block;
398     for (i = 0; i < 8; i++) {
399         t1 = 17 * (src[ 0] + src[16]) + 64;
400         t2 = 17 * (src[ 0] - src[16]) + 64;
401         t3 = 22 * src[ 8] + 10 * src[24];
402         t4 = 22 * src[24] - 10 * src[ 8];
403 
404         dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t1 + t3) >> 7));
405         dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t2 - t4) >> 7));
406         dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t2 + t4) >> 7));
407         dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t1 - t3) >> 7));
408 
409         src++;
410         dest++;
411     }
412 }
413 
414 /* Do inverse transform on 4x8 parts of block */
vc1_inv_trans_4x8_dc_c(uint8_t * dest,ptrdiff_t stride,int16_t * block)415 static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
416 {
417     int i;
418     int dc = block[0];
419 
420     dc = (17 * dc +  4) >> 3;
421     dc = (12 * dc + 64) >> 7;
422 
423     for (i = 0; i < 8; i++) {
424         dest[0] = av_clip_uint8(dest[0] + dc);
425         dest[1] = av_clip_uint8(dest[1] + dc);
426         dest[2] = av_clip_uint8(dest[2] + dc);
427         dest[3] = av_clip_uint8(dest[3] + dc);
428         dest += stride;
429     }
430 }
431 
vc1_inv_trans_4x8_c(uint8_t * dest,ptrdiff_t stride,int16_t * block)432 static void vc1_inv_trans_4x8_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
433 {
434     int i;
435     register int t1, t2, t3, t4, t5, t6, t7, t8;
436     int16_t *src, *dst;
437 
438     src = block;
439     dst = block;
440 
441     for (i = 0; i < 8; i++) {
442         t1 = 17 * (src[0] + src[2]) + 4;
443         t2 = 17 * (src[0] - src[2]) + 4;
444         t3 = 22 * src[1] + 10 * src[3];
445         t4 = 22 * src[3] - 10 * src[1];
446 
447         dst[0] = (t1 + t3) >> 3;
448         dst[1] = (t2 - t4) >> 3;
449         dst[2] = (t2 + t4) >> 3;
450         dst[3] = (t1 - t3) >> 3;
451 
452         src += 8;
453         dst += 8;
454     }
455 
456     src = block;
457     for (i = 0; i < 4; i++) {
458         t1 = 12 * (src[ 0] + src[32]) + 64;
459         t2 = 12 * (src[ 0] - src[32]) + 64;
460         t3 = 16 * src[16] +  6 * src[48];
461         t4 =  6 * src[16] - 16 * src[48];
462 
463         t5 = t1 + t3;
464         t6 = t2 + t4;
465         t7 = t2 - t4;
466         t8 = t1 - t3;
467 
468         t1 = 16 * src[ 8] + 15 * src[24] +  9 * src[40] +  4 * src[56];
469         t2 = 15 * src[ 8] -  4 * src[24] - 16 * src[40] -  9 * src[56];
470         t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
471         t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
472 
473         dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t5 + t1)     >> 7));
474         dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t6 + t2)     >> 7));
475         dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t7 + t3)     >> 7));
476         dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t8 + t4)     >> 7));
477         dest[4 * stride] = av_clip_uint8(dest[4 * stride] + ((t8 - t4 + 1) >> 7));
478         dest[5 * stride] = av_clip_uint8(dest[5 * stride] + ((t7 - t3 + 1) >> 7));
479         dest[6 * stride] = av_clip_uint8(dest[6 * stride] + ((t6 - t2 + 1) >> 7));
480         dest[7 * stride] = av_clip_uint8(dest[7 * stride] + ((t5 - t1 + 1) >> 7));
481 
482         src++;
483         dest++;
484     }
485 }
486 
487 /* Do inverse transform on 4x4 part of block */
vc1_inv_trans_4x4_dc_c(uint8_t * dest,ptrdiff_t stride,int16_t * block)488 static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
489 {
490     int i;
491     int dc = block[0];
492 
493     dc = (17 * dc +  4) >> 3;
494     dc = (17 * dc + 64) >> 7;
495 
496     for (i = 0; i < 4; i++) {
497         dest[0] = av_clip_uint8(dest[0] + dc);
498         dest[1] = av_clip_uint8(dest[1] + dc);
499         dest[2] = av_clip_uint8(dest[2] + dc);
500         dest[3] = av_clip_uint8(dest[3] + dc);
501         dest += stride;
502     }
503 }
504 
vc1_inv_trans_4x4_c(uint8_t * dest,ptrdiff_t stride,int16_t * block)505 static void vc1_inv_trans_4x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
506 {
507     int i;
508     register int t1, t2, t3, t4;
509     int16_t *src, *dst;
510 
511     src = block;
512     dst = block;
513     for (i = 0; i < 4; i++) {
514         t1 = 17 * (src[0] + src[2]) + 4;
515         t2 = 17 * (src[0] - src[2]) + 4;
516         t3 = 22 * src[1] + 10 * src[3];
517         t4 = 22 * src[3] - 10 * src[1];
518 
519         dst[0] = (t1 + t3) >> 3;
520         dst[1] = (t2 - t4) >> 3;
521         dst[2] = (t2 + t4) >> 3;
522         dst[3] = (t1 - t3) >> 3;
523 
524         src += 8;
525         dst += 8;
526     }
527 
528     src = block;
529     for (i = 0; i < 4; i++) {
530         t1 = 17 * (src[0] + src[16]) + 64;
531         t2 = 17 * (src[0] - src[16]) + 64;
532         t3 = 22 * src[8] + 10 * src[24];
533         t4 = 22 * src[24] - 10 * src[8];
534 
535         dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t1 + t3) >> 7));
536         dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t2 - t4) >> 7));
537         dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t2 + t4) >> 7));
538         dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t1 - t3) >> 7));
539 
540         src++;
541         dest++;
542     }
543 }
544 
545 /* motion compensation functions */
546 
547 /* Filter in case of 2 filters */
548 #define VC1_MSPEL_FILTER_16B(DIR, TYPE)                                       \
549 static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, \
550                                                                 int stride,   \
551                                                                 int mode)     \
552 {                                                                             \
553     switch(mode) {                                                            \
554     case 0: /* no shift - should not occur */                                 \
555         return 0;                                                             \
556     case 1: /* 1/4 shift */                                                   \
557         return -4 * src[-stride] + 53 * src[0] +                              \
558                18 * src[stride]  -  3 * src[stride * 2];                      \
559     case 2: /* 1/2 shift */                                                   \
560         return -1 * src[-stride] +  9 * src[0] +                              \
561                 9 * src[stride]  -  1 * src[stride * 2];                      \
562     case 3: /* 3/4 shift */                                                   \
563         return -3 * src[-stride] + 18 * src[0] +                              \
564                53 * src[stride]  -  4 * src[stride * 2];                      \
565     }                                                                         \
566     return 0; /* should not occur */                                          \
567 }
568 
VC1_MSPEL_FILTER_16B(ver,uint8_t)569 VC1_MSPEL_FILTER_16B(ver, uint8_t)
570 VC1_MSPEL_FILTER_16B(hor, int16_t)
571 
572 /* Filter used to interpolate fractional pel values */
573 static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride,
574                                              int mode, int r)
575 {
576     switch (mode) {
577     case 0: // no shift
578         return src[0];
579     case 1: // 1/4 shift
580         return (-4 * src[-stride] + 53 * src[0] +
581                 18 * src[stride]  -  3 * src[stride * 2] + 32 - r) >> 6;
582     case 2: // 1/2 shift
583         return (-1 * src[-stride] +  9 * src[0] +
584                  9 * src[stride]  -  1 * src[stride * 2] + 8 - r) >> 4;
585     case 3: // 3/4 shift
586         return (-3 * src[-stride] + 18 * src[0] +
587                 53 * src[stride]  -  4 * src[stride * 2] + 32 - r) >> 6;
588     }
589     return 0; // should not occur
590 }
591 
592 /* Function used to do motion compensation with bicubic interpolation */
593 #define VC1_MSPEL_MC(OP, OP4, OPNAME)                                         \
594 static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst,             \
595                                                     const uint8_t *src,       \
596                                                     ptrdiff_t stride,         \
597                                                     int hmode,                \
598                                                     int vmode,                \
599                                                     int rnd)                  \
600 {                                                                             \
601     int i, j;                                                                 \
602                                                                               \
603     if (vmode) { /* Horizontal filter to apply */                             \
604         int r;                                                                \
605                                                                               \
606         if (hmode) { /* Vertical filter to apply, output to tmp */            \
607             static const int shift_value[] = { 0, 5, 1, 5 };                  \
608             int shift = (shift_value[hmode] + shift_value[vmode]) >> 1;       \
609             int16_t tmp[11 * 8], *tptr = tmp;                                 \
610                                                                               \
611             r = (1 << (shift - 1)) + rnd - 1;                                 \
612                                                                               \
613             src -= 1;                                                         \
614             for (j = 0; j < 8; j++) {                                         \
615                 for (i = 0; i < 11; i++)                                      \
616                     tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
617                 src  += stride;                                               \
618                 tptr += 11;                                                   \
619             }                                                                 \
620                                                                               \
621             r    = 64 - rnd;                                                  \
622             tptr = tmp + 1;                                                   \
623             for (j = 0; j < 8; j++) {                                         \
624                 for (i = 0; i < 8; i++)                                       \
625                     OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
626                 dst  += stride;                                               \
627                 tptr += 11;                                                   \
628             }                                                                 \
629                                                                               \
630             return;                                                           \
631         } else { /* No horizontal filter, output 8 lines to dst */            \
632             r = 1 - rnd;                                                      \
633                                                                               \
634             for (j = 0; j < 8; j++) {                                         \
635                 for (i = 0; i < 8; i++)                                       \
636                     OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));  \
637                 src += stride;                                                \
638                 dst += stride;                                                \
639             }                                                                 \
640             return;                                                           \
641         }                                                                     \
642     }                                                                         \
643                                                                               \
644     /* Horizontal mode with no vertical mode */                               \
645     for (j = 0; j < 8; j++) {                                                 \
646         for (i = 0; i < 8; i++)                                               \
647             OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));             \
648         dst += stride;                                                        \
649         src += stride;                                                        \
650     }                                                                         \
651 }\
652 static av_always_inline void OPNAME ## vc1_mspel_mc_16(uint8_t *dst,          \
653                                                        const uint8_t *src,    \
654                                                        ptrdiff_t stride,      \
655                                                        int hmode,             \
656                                                        int vmode,             \
657                                                        int rnd)               \
658 {                                                                             \
659     int i, j;                                                                 \
660                                                                               \
661     if (vmode) { /* Horizontal filter to apply */                             \
662         int r;                                                                \
663                                                                               \
664         if (hmode) { /* Vertical filter to apply, output to tmp */            \
665             static const int shift_value[] = { 0, 5, 1, 5 };                  \
666             int shift = (shift_value[hmode] + shift_value[vmode]) >> 1;       \
667             int16_t tmp[19 * 16], *tptr = tmp;                                \
668                                                                               \
669             r = (1 << (shift - 1)) + rnd - 1;                                 \
670                                                                               \
671             src -= 1;                                                         \
672             for (j = 0; j < 16; j++) {                                        \
673                 for (i = 0; i < 19; i++)                                      \
674                     tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
675                 src  += stride;                                               \
676                 tptr += 19;                                                   \
677             }                                                                 \
678                                                                               \
679             r    = 64 - rnd;                                                  \
680             tptr = tmp + 1;                                                   \
681             for (j = 0; j < 16; j++) {                                        \
682                 for (i = 0; i < 16; i++)                                      \
683                     OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
684                 dst  += stride;                                               \
685                 tptr += 19;                                                   \
686             }                                                                 \
687                                                                               \
688             return;                                                           \
689         } else { /* No horizontal filter, output 8 lines to dst */            \
690             r = 1 - rnd;                                                      \
691                                                                               \
692             for (j = 0; j < 16; j++) {                                        \
693                 for (i = 0; i < 16; i++)                                      \
694                     OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));  \
695                 src += stride;                                                \
696                 dst += stride;                                                \
697             }                                                                 \
698             return;                                                           \
699         }                                                                     \
700     }                                                                         \
701                                                                               \
702     /* Horizontal mode with no vertical mode */                               \
703     for (j = 0; j < 16; j++) {                                                \
704         for (i = 0; i < 16; i++)                                              \
705             OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));             \
706         dst += stride;                                                        \
707         src += stride;                                                        \
708     }                                                                         \
709 }\
710 static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
711     int i;\
712     for(i=0; i<8; i++){\
713         OP4(*(uint32_t*)(block  ), AV_RN32(pixels  ));\
714         OP4(*(uint32_t*)(block+4), AV_RN32(pixels+4));\
715         pixels+=line_size;\
716         block +=line_size;\
717     }\
718 }\
719 static void OPNAME ## pixels16x16_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
720     int i;\
721     for(i=0; i<16; i++){\
722         OP4(*(uint32_t*)(block   ), AV_RN32(pixels   ));\
723         OP4(*(uint32_t*)(block+ 4), AV_RN32(pixels+ 4));\
724         OP4(*(uint32_t*)(block+ 8), AV_RN32(pixels+ 8));\
725         OP4(*(uint32_t*)(block+12), AV_RN32(pixels+12));\
726         pixels+=line_size;\
727         block +=line_size;\
728     }\
729 }
730 
731 #define op_put(a, b) (a) = av_clip_uint8(b)
732 #define op_avg(a, b) (a) = ((a) + av_clip_uint8(b) + 1) >> 1
733 #define op4_avg(a, b) (a) = rnd_avg32(a, b)
734 #define op4_put(a, b) (a) = (b)
735 
VC1_MSPEL_MC(op_put,op4_put,put_)736 VC1_MSPEL_MC(op_put, op4_put, put_)
737 VC1_MSPEL_MC(op_avg, op4_avg, avg_)
738 
739 /* pixel functions - really are entry points to vc1_mspel_mc */
740 
741 #define PUT_VC1_MSPEL(a, b)                                                   \
742 static void put_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst,                    \
743                                              const uint8_t *src,              \
744                                              ptrdiff_t stride, int rnd)       \
745 {                                                                             \
746     put_vc1_mspel_mc(dst, src, stride, a, b, rnd);                            \
747 }                                                                             \
748 static void avg_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst,                    \
749                                              const uint8_t *src,              \
750                                              ptrdiff_t stride, int rnd)       \
751 {                                                                             \
752     avg_vc1_mspel_mc(dst, src, stride, a, b, rnd);                            \
753 }                                                                             \
754 static void put_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst,                 \
755                                                 const uint8_t *src,           \
756                                                 ptrdiff_t stride, int rnd)    \
757 {                                                                             \
758     put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                         \
759 }                                                                             \
760 static void avg_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst,                 \
761                                                 const uint8_t *src,           \
762                                                 ptrdiff_t stride, int rnd)    \
763 {                                                                             \
764     avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                         \
765 }
766 
767 PUT_VC1_MSPEL(1, 0)
768 PUT_VC1_MSPEL(2, 0)
769 PUT_VC1_MSPEL(3, 0)
770 
771 PUT_VC1_MSPEL(0, 1)
772 PUT_VC1_MSPEL(1, 1)
773 PUT_VC1_MSPEL(2, 1)
774 PUT_VC1_MSPEL(3, 1)
775 
776 PUT_VC1_MSPEL(0, 2)
777 PUT_VC1_MSPEL(1, 2)
778 PUT_VC1_MSPEL(2, 2)
779 PUT_VC1_MSPEL(3, 2)
780 
781 PUT_VC1_MSPEL(0, 3)
782 PUT_VC1_MSPEL(1, 3)
783 PUT_VC1_MSPEL(2, 3)
784 PUT_VC1_MSPEL(3, 3)
785 
786 #define chroma_mc(a) \
787     ((A * src[a] + B * src[a + 1] + \
788       C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6)
789 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
790                                         uint8_t *src /* align 1 */,
791                                         ptrdiff_t stride, int h, int x, int y)
792 {
793     const int A = (8 - x) * (8 - y);
794     const int B =     (x) * (8 - y);
795     const int C = (8 - x) *     (y);
796     const int D =     (x) *     (y);
797     int i;
798 
799     av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
800 
801     for (i = 0; i < h; i++) {
802         dst[0] = chroma_mc(0);
803         dst[1] = chroma_mc(1);
804         dst[2] = chroma_mc(2);
805         dst[3] = chroma_mc(3);
806         dst[4] = chroma_mc(4);
807         dst[5] = chroma_mc(5);
808         dst[6] = chroma_mc(6);
809         dst[7] = chroma_mc(7);
810         dst += stride;
811         src += stride;
812     }
813 }
814 
put_no_rnd_vc1_chroma_mc4_c(uint8_t * dst,uint8_t * src,ptrdiff_t stride,int h,int x,int y)815 static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src,
816                                         ptrdiff_t stride, int h, int x, int y)
817 {
818     const int A = (8 - x) * (8 - y);
819     const int B =     (x) * (8 - y);
820     const int C = (8 - x) *     (y);
821     const int D =     (x) *     (y);
822     int i;
823 
824     av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
825 
826     for (i = 0; i < h; i++) {
827         dst[0] = chroma_mc(0);
828         dst[1] = chroma_mc(1);
829         dst[2] = chroma_mc(2);
830         dst[3] = chroma_mc(3);
831         dst += stride;
832         src += stride;
833     }
834 }
835 
836 #define avg2(a, b) (((a) + (b) + 1) >> 1)
avg_no_rnd_vc1_chroma_mc8_c(uint8_t * dst,uint8_t * src,ptrdiff_t stride,int h,int x,int y)837 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
838                                         uint8_t *src /* align 1 */,
839                                         ptrdiff_t stride, int h, int x, int y)
840 {
841     const int A = (8 - x) * (8 - y);
842     const int B =     (x) * (8 - y);
843     const int C = (8 - x) *     (y);
844     const int D =     (x) *     (y);
845     int i;
846 
847     av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
848 
849     for (i = 0; i < h; i++) {
850         dst[0] = avg2(dst[0], chroma_mc(0));
851         dst[1] = avg2(dst[1], chroma_mc(1));
852         dst[2] = avg2(dst[2], chroma_mc(2));
853         dst[3] = avg2(dst[3], chroma_mc(3));
854         dst[4] = avg2(dst[4], chroma_mc(4));
855         dst[5] = avg2(dst[5], chroma_mc(5));
856         dst[6] = avg2(dst[6], chroma_mc(6));
857         dst[7] = avg2(dst[7], chroma_mc(7));
858         dst += stride;
859         src += stride;
860     }
861 }
862 
avg_no_rnd_vc1_chroma_mc4_c(uint8_t * dst,uint8_t * src,ptrdiff_t stride,int h,int x,int y)863 static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */,
864                                         uint8_t *src /* align 1 */,
865                                         ptrdiff_t stride, int h, int x, int y)
866 {
867     const int A = (8 - x) * (8 - y);
868     const int B = (    x) * (8 - y);
869     const int C = (8 - x) * (    y);
870     const int D = (    x) * (    y);
871     int i;
872 
873     av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
874 
875     for (i = 0; i < h; i++) {
876         dst[0] = avg2(dst[0], chroma_mc(0));
877         dst[1] = avg2(dst[1], chroma_mc(1));
878         dst[2] = avg2(dst[2], chroma_mc(2));
879         dst[3] = avg2(dst[3], chroma_mc(3));
880         dst += stride;
881         src += stride;
882     }
883 }
884 
885 #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
886 
sprite_h_c(uint8_t * dst,const uint8_t * src,int offset,int advance,int count)887 static void sprite_h_c(uint8_t *dst, const uint8_t *src, int offset,
888                        int advance, int count)
889 {
890     while (count--) {
891         int a = src[(offset >> 16)];
892         int b = src[(offset >> 16) + 1];
893         *dst++  = a + ((b - a) * (offset & 0xFFFF) >> 16);
894         offset += advance;
895     }
896 }
897 
sprite_v_template(uint8_t * dst,const uint8_t * src1a,const uint8_t * src1b,int offset1,int two_sprites,const uint8_t * src2a,const uint8_t * src2b,int offset2,int alpha,int scaled,int width)898 static av_always_inline void sprite_v_template(uint8_t *dst,
899                                                const uint8_t *src1a,
900                                                const uint8_t *src1b,
901                                                int offset1,
902                                                int two_sprites,
903                                                const uint8_t *src2a,
904                                                const uint8_t *src2b,
905                                                int offset2,
906                                                int alpha, int scaled,
907                                                int width)
908 {
909     int a1, b1, a2, b2;
910     while (width--) {
911         a1 = *src1a++;
912         if (scaled) {
913             b1 = *src1b++;
914             a1 = a1 + ((b1 - a1) * offset1 >> 16);
915         }
916         if (two_sprites) {
917             a2 = *src2a++;
918             if (scaled > 1) {
919                 b2 = *src2b++;
920                 a2 = a2 + ((b2 - a2) * offset2 >> 16);
921             }
922             a1 = a1 + ((a2 - a1) * alpha >> 16);
923         }
924         *dst++ = a1;
925     }
926 }
927 
sprite_v_single_c(uint8_t * dst,const uint8_t * src1a,const uint8_t * src1b,int offset,int width)928 static void sprite_v_single_c(uint8_t *dst, const uint8_t *src1a,
929                               const uint8_t *src1b,
930                               int offset, int width)
931 {
932     sprite_v_template(dst, src1a, src1b, offset, 0, NULL, NULL, 0, 0, 1, width);
933 }
934 
sprite_v_double_noscale_c(uint8_t * dst,const uint8_t * src1a,const uint8_t * src2a,int alpha,int width)935 static void sprite_v_double_noscale_c(uint8_t *dst, const uint8_t *src1a,
936                                       const uint8_t *src2a,
937                                       int alpha, int width)
938 {
939     sprite_v_template(dst, src1a, NULL, 0, 1, src2a, NULL, 0, alpha, 0, width);
940 }
941 
sprite_v_double_onescale_c(uint8_t * dst,const uint8_t * src1a,const uint8_t * src1b,int offset1,const uint8_t * src2a,int alpha,int width)942 static void sprite_v_double_onescale_c(uint8_t *dst,
943                                        const uint8_t *src1a,
944                                        const uint8_t *src1b,
945                                        int offset1,
946                                        const uint8_t *src2a,
947                                        int alpha, int width)
948 {
949     sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, NULL, 0, alpha, 1,
950                       width);
951 }
952 
sprite_v_double_twoscale_c(uint8_t * dst,const uint8_t * src1a,const uint8_t * src1b,int offset1,const uint8_t * src2a,const uint8_t * src2b,int offset2,int alpha,int width)953 static void sprite_v_double_twoscale_c(uint8_t *dst,
954                                        const uint8_t *src1a,
955                                        const uint8_t *src1b,
956                                        int offset1,
957                                        const uint8_t *src2a,
958                                        const uint8_t *src2b,
959                                        int offset2,
960                                        int alpha,
961                                        int width)
962 {
963     sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, src2b, offset2,
964                       alpha, 2, width);
965 }
966 
967 #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
968 #define FN_ASSIGN(X, Y) \
969     dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = put_vc1_mspel_mc##X##Y##_c; \
970     dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = put_vc1_mspel_mc##X##Y##_16_c; \
971     dsp->avg_vc1_mspel_pixels_tab[1][X+4*Y] = avg_vc1_mspel_mc##X##Y##_c; \
972     dsp->avg_vc1_mspel_pixels_tab[0][X+4*Y] = avg_vc1_mspel_mc##X##Y##_16_c
973 
ff_vc1dsp_init(VC1DSPContext * dsp)974 av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
975 {
976     dsp->vc1_inv_trans_8x8    = vc1_inv_trans_8x8_c;
977     dsp->vc1_inv_trans_4x8    = vc1_inv_trans_4x8_c;
978     dsp->vc1_inv_trans_8x4    = vc1_inv_trans_8x4_c;
979     dsp->vc1_inv_trans_4x4    = vc1_inv_trans_4x4_c;
980     dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_c;
981     dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_c;
982     dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_c;
983     dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_c;
984 
985     dsp->vc1_h_overlap        = vc1_h_overlap_c;
986     dsp->vc1_v_overlap        = vc1_v_overlap_c;
987     dsp->vc1_h_s_overlap      = vc1_h_s_overlap_c;
988     dsp->vc1_v_s_overlap      = vc1_v_s_overlap_c;
989 
990     dsp->vc1_v_loop_filter4   = vc1_v_loop_filter4_c;
991     dsp->vc1_h_loop_filter4   = vc1_h_loop_filter4_c;
992     dsp->vc1_v_loop_filter8   = vc1_v_loop_filter8_c;
993     dsp->vc1_h_loop_filter8   = vc1_h_loop_filter8_c;
994     dsp->vc1_v_loop_filter16  = vc1_v_loop_filter16_c;
995     dsp->vc1_h_loop_filter16  = vc1_h_loop_filter16_c;
996 
997     dsp->put_vc1_mspel_pixels_tab[0][0] = put_pixels16x16_c;
998     dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_pixels16x16_c;
999     dsp->put_vc1_mspel_pixels_tab[1][0] = put_pixels8x8_c;
1000     dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_pixels8x8_c;
1001     FN_ASSIGN(0, 1);
1002     FN_ASSIGN(0, 2);
1003     FN_ASSIGN(0, 3);
1004 
1005     FN_ASSIGN(1, 0);
1006     FN_ASSIGN(1, 1);
1007     FN_ASSIGN(1, 2);
1008     FN_ASSIGN(1, 3);
1009 
1010     FN_ASSIGN(2, 0);
1011     FN_ASSIGN(2, 1);
1012     FN_ASSIGN(2, 2);
1013     FN_ASSIGN(2, 3);
1014 
1015     FN_ASSIGN(3, 0);
1016     FN_ASSIGN(3, 1);
1017     FN_ASSIGN(3, 2);
1018     FN_ASSIGN(3, 3);
1019 
1020     dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_c;
1021     dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_c;
1022     dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = put_no_rnd_vc1_chroma_mc4_c;
1023     dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = avg_no_rnd_vc1_chroma_mc4_c;
1024 
1025 #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
1026     dsp->sprite_h                 = sprite_h_c;
1027     dsp->sprite_v_single          = sprite_v_single_c;
1028     dsp->sprite_v_double_noscale  = sprite_v_double_noscale_c;
1029     dsp->sprite_v_double_onescale = sprite_v_double_onescale_c;
1030     dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c;
1031 #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
1032 
1033     dsp->startcode_find_candidate = ff_startcode_find_candidate_c;
1034     dsp->vc1_unescape_buffer      = vc1_unescape_buffer;
1035 
1036 #if ARCH_AARCH64
1037     ff_vc1dsp_init_aarch64(dsp);
1038 #elif ARCH_ARM
1039     ff_vc1dsp_init_arm(dsp);
1040 #elif ARCH_PPC
1041     ff_vc1dsp_init_ppc(dsp);
1042 #elif ARCH_X86
1043     ff_vc1dsp_init_x86(dsp);
1044 #elif ARCH_MIPS
1045     ff_vc1dsp_init_mips(dsp);
1046 #elif ARCH_LOONGARCH
1047     ff_vc1dsp_init_loongarch(dsp);
1048 #endif
1049 }
1050