• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp8_rtcd.h"
12 #include "vpx_ports/mem.h"
13 #include "vpx_ports/asmdefs_mmi.h"
14 
15 #define TRANSPOSE_4H \
16   "xor           %[ftmp0],    %[ftmp0],    %[ftmp0]          \n\t" \
17   MMI_LI(%[tmp0], 0x93)                                            \
18   "mtc1          %[tmp0],     %[ftmp10]                      \n\t" \
19   "punpcklhw     %[ftmp5],    %[ftmp1],    %[ftmp0]          \n\t" \
20   "punpcklhw     %[ftmp9],    %[ftmp2],    %[ftmp0]          \n\t" \
21   "pshufh        %[ftmp9],    %[ftmp9],    %[ftmp10]         \n\t" \
22   "or            %[ftmp5],    %[ftmp5],    %[ftmp9]          \n\t" \
23   "punpckhhw     %[ftmp6],    %[ftmp1],    %[ftmp0]          \n\t" \
24   "punpckhhw     %[ftmp9],    %[ftmp2],    %[ftmp0]          \n\t" \
25   "pshufh        %[ftmp9],    %[ftmp9],    %[ftmp10]         \n\t" \
26   "or            %[ftmp6],    %[ftmp6],    %[ftmp9]          \n\t" \
27   "punpcklhw     %[ftmp7],    %[ftmp3],    %[ftmp0]          \n\t" \
28   "punpcklhw     %[ftmp9],    %[ftmp4],    %[ftmp0]          \n\t" \
29   "pshufh        %[ftmp9],    %[ftmp9],    %[ftmp10]         \n\t" \
30   "or            %[ftmp7],    %[ftmp7],    %[ftmp9]          \n\t" \
31   "punpckhhw     %[ftmp8],    %[ftmp3],    %[ftmp0]          \n\t" \
32   "punpckhhw     %[ftmp9],    %[ftmp4],    %[ftmp0]          \n\t" \
33   "pshufh        %[ftmp9],    %[ftmp9],    %[ftmp10]         \n\t" \
34   "or            %[ftmp8],    %[ftmp8],    %[ftmp9]          \n\t" \
35   "punpcklwd     %[ftmp1],    %[ftmp5],    %[ftmp7]          \n\t" \
36   "punpckhwd     %[ftmp2],    %[ftmp5],    %[ftmp7]          \n\t" \
37   "punpcklwd     %[ftmp3],    %[ftmp6],    %[ftmp8]          \n\t" \
38   "punpckhwd     %[ftmp4],    %[ftmp6],    %[ftmp8]          \n\t"
39 
vp8_short_idct4x4llm_mmi(int16_t * input,unsigned char * pred_ptr,int pred_stride,unsigned char * dst_ptr,int dst_stride)40 void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr,
41                               int pred_stride, unsigned char *dst_ptr,
42                               int dst_stride) {
43   double ftmp[12];
44   uint32_t tmp[0];
45   DECLARE_ALIGNED(8, const uint64_t, ff_ph_04) = { 0x0004000400040004ULL };
46   DECLARE_ALIGNED(8, const uint64_t, ff_ph_4e7b) = { 0x4e7b4e7b4e7b4e7bULL };
47   DECLARE_ALIGNED(8, const uint64_t, ff_ph_22a3) = { 0x22a322a322a322a3ULL };
48 
49   __asm__ volatile (
50     MMI_LI(%[tmp0], 0x02)
51     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
52     "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
53 
54     "gsldlc1    %[ftmp1],   0x07(%[ip])                         \n\t"
55     "gsldrc1    %[ftmp1],   0x00(%[ip])                         \n\t"
56     "gsldlc1    %[ftmp2],   0x0f(%[ip])                         \n\t"
57     "gsldrc1    %[ftmp2],   0x08(%[ip])                         \n\t"
58     "gsldlc1    %[ftmp3],   0x17(%[ip])                         \n\t"
59     "gsldrc1    %[ftmp3],   0x10(%[ip])                         \n\t"
60     "gsldlc1    %[ftmp4],   0x1f(%[ip])                         \n\t"
61     "gsldrc1    %[ftmp4],   0x18(%[ip])                         \n\t"
62 
63     // ip[0...3] + ip[8...11]
64     "paddh      %[ftmp5],   %[ftmp1],       %[ftmp3]            \n\t"
65     // ip[0...3] - ip[8...11]
66     "psubh      %[ftmp6],   %[ftmp1],       %[ftmp3]            \n\t"
67     // (ip[12...15] * sinpi8sqrt2) >> 16
68     "psllh      %[ftmp9],   %[ftmp4],       %[ftmp11]           \n\t"
69     "pmulhh     %[ftmp7],   %[ftmp9],       %[ff_ph_22a3]       \n\t"
70     // (ip[ 4... 7] * sinpi8sqrt2) >> 16
71     "psllh      %[ftmp9],   %[ftmp2],       %[ftmp11]           \n\t"
72     "pmulhh     %[ftmp8],   %[ftmp9],       %[ff_ph_22a3]       \n\t"
73     // ip[ 4... 7] + ((ip[ 4... 7] * cospi8sqrt2minus1) >> 16)
74     "pmulhh     %[ftmp9],   %[ftmp2],       %[ff_ph_4e7b]       \n\t"
75     "paddh      %[ftmp9],   %[ftmp9],       %[ftmp2]            \n\t"
76     // ip[12...15] + ((ip[12...15] * cospi8sqrt2minus1) >> 16)
77     "pmulhh     %[ftmp10],  %[ftmp4],       %[ff_ph_4e7b]       \n\t"
78     "paddh      %[ftmp10],  %[ftmp10],      %[ftmp4]            \n\t"
79 
80     "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]            \n\t"
81     "paddh      %[ftmp1],   %[ftmp1],       %[ftmp9]            \n\t"
82     "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]            \n\t"
83     "psubh      %[ftmp2],   %[ftmp2],       %[ftmp10]           \n\t"
84     "psubh      %[ftmp3],   %[ftmp6],       %[ftmp8]            \n\t"
85     "paddh      %[ftmp3],   %[ftmp3],       %[ftmp10]           \n\t"
86     "psubh      %[ftmp4],   %[ftmp5],       %[ftmp7]            \n\t"
87     "psubh      %[ftmp4],   %[ftmp4],       %[ftmp9]            \n\t"
88 
89     TRANSPOSE_4H
90     // a
91     "paddh      %[ftmp5],   %[ftmp1],       %[ftmp3]            \n\t"
92     // b
93     "psubh      %[ftmp6],   %[ftmp1],       %[ftmp3]            \n\t"
94     // c
95     "psllh      %[ftmp9],   %[ftmp2],       %[ftmp11]           \n\t"
96     "pmulhh     %[ftmp9],   %[ftmp9],       %[ff_ph_22a3]       \n\t"
97     "psubh      %[ftmp7],   %[ftmp9],       %[ftmp4]            \n\t"
98     "pmulhh     %[ftmp10],  %[ftmp4],       %[ff_ph_4e7b]       \n\t"
99     "psubh      %[ftmp7],   %[ftmp7],       %[ftmp10]           \n\t"
100     // d
101     "psllh      %[ftmp9],   %[ftmp4],       %[ftmp11]           \n\t"
102     "pmulhh     %[ftmp9],   %[ftmp9],       %[ff_ph_22a3]       \n\t"
103     "paddh      %[ftmp8],   %[ftmp9],       %[ftmp2]            \n\t"
104     "pmulhh     %[ftmp10],  %[ftmp2],       %[ff_ph_4e7b]       \n\t"
105     "paddh      %[ftmp8],   %[ftmp8],       %[ftmp10]           \n\t"
106 
107     MMI_LI(%[tmp0], 0x03)
108     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
109     // a + d
110     "paddh      %[ftmp1],   %[ftmp5],       %[ftmp8]            \n\t"
111     "paddh      %[ftmp1],   %[ftmp1],       %[ff_ph_04]         \n\t"
112     "psrah      %[ftmp1],   %[ftmp1],       %[ftmp11]           \n\t"
113     // b + c
114     "paddh      %[ftmp2],   %[ftmp6],       %[ftmp7]            \n\t"
115     "paddh      %[ftmp2],   %[ftmp2],       %[ff_ph_04]         \n\t"
116     "psrah      %[ftmp2],   %[ftmp2],       %[ftmp11]           \n\t"
117     // b - c
118     "psubh      %[ftmp3],   %[ftmp6],       %[ftmp7]            \n\t"
119     "paddh      %[ftmp3],   %[ftmp3],       %[ff_ph_04]         \n\t"
120     "psrah      %[ftmp3],   %[ftmp3],       %[ftmp11]           \n\t"
121     // a - d
122     "psubh      %[ftmp4],   %[ftmp5],       %[ftmp8]            \n\t"
123     "paddh      %[ftmp4],   %[ftmp4],       %[ff_ph_04]         \n\t"
124     "psrah      %[ftmp4],   %[ftmp4],       %[ftmp11]           \n\t"
125 
126     TRANSPOSE_4H
127 #if _MIPS_SIM == _ABIO32
128     "ulw        %[tmp0],    0x00(%[pred_prt])                   \n\t"
129     "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
130 #else
131     "gslwlc1    %[ftmp5],   0x03(%[pred_ptr])                   \n\t"
132     "gslwrc1    %[ftmp5],   0x00(%[pred_ptr])                   \n\t"
133 #endif
134     "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]            \n\t"
135     "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
136     "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
137     "gsswlc1    %[ftmp1],   0x03(%[dst_ptr])                    \n\t"
138     "gsswrc1    %[ftmp1],   0x00(%[dst_ptr])                    \n\t"
139     MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
140     MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
141 
142 #if _MIPS_SIM == _ABIO32
143     "ulw        %[tmp0],    0x00(%[pred_prt])                   \n\t"
144     "mtc1       %[tmp0],    %[ftmp6]                            \n\t"
145 #else
146     "gslwlc1    %[ftmp6],   0x03(%[pred_ptr])                   \n\t"
147     "gslwrc1    %[ftmp6],   0x00(%[pred_ptr])                   \n\t"
148 #endif
149     "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
150     "paddh      %[ftmp2],   %[ftmp2],       %[ftmp6]            \n\t"
151     "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]            \n\t"
152     "gsswlc1    %[ftmp2],   0x03(%[dst_ptr])                    \n\t"
153     "gsswrc1    %[ftmp2],   0x00(%[dst_ptr])                    \n\t"
154     MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
155     MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
156 
157 #if _MIPS_SIM == _ABIO32
158     "ulw        %[tmp0],    0x00(%[pred_prt])                   \n\t"
159     "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
160 #else
161     "gslwlc1    %[ftmp7],   0x03(%[pred_ptr])                   \n\t"
162     "gslwrc1    %[ftmp7],   0x00(%[pred_ptr])                   \n\t"
163 #endif
164     "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]            \n\t"
165     "paddh      %[ftmp3],   %[ftmp3],       %[ftmp7]            \n\t"
166     "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]            \n\t"
167     "gsswlc1    %[ftmp3],   0x03(%[dst_ptr])                    \n\t"
168     "gsswrc1    %[ftmp3],   0x00(%[dst_ptr])                    \n\t"
169     MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
170     MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
171 
172 #if _MIPS_SIM == _ABIO32
173     "ulw        %[tmp0],    0x00(%[pred_prt])                   \n\t"
174     "mtc1       %[tmp0],    %[ftmp8]                            \n\t"
175 #else
176     "gslwlc1    %[ftmp8],   0x03(%[pred_ptr])                   \n\t"
177     "gslwrc1    %[ftmp8],   0x00(%[pred_ptr])                   \n\t"
178 #endif
179     "punpcklbh  %[ftmp8],   %[ftmp8],       %[ftmp0]            \n\t"
180     "paddh      %[ftmp4],   %[ftmp4],       %[ftmp8]            \n\t"
181     "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]            \n\t"
182     "gsswlc1    %[ftmp4],   0x03(%[dst_ptr])                    \n\t"
183     "gsswrc1    %[ftmp4],   0x00(%[dst_ptr])                    \n\t"
184     : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
185       [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
186       [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
187       [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]),
188       [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]),
189       [pred_ptr]"+&r"(pred_ptr), [dst_ptr]"+&r"(dst_ptr)
190     : [ip]"r"(input), [ff_ph_22a3]"f"(ff_ph_22a3),
191       [ff_ph_4e7b]"f"(ff_ph_4e7b), [ff_ph_04]"f"(ff_ph_04),
192       [pred_stride]"r"((mips_reg)pred_stride),
193       [dst_stride]"r"((mips_reg)dst_stride)
194     : "memory"
195   );
196 }
197 
vp8_dc_only_idct_add_mmi(int16_t input_dc,unsigned char * pred_ptr,int pred_stride,unsigned char * dst_ptr,int dst_stride)198 void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr,
199                               int pred_stride, unsigned char *dst_ptr,
200                               int dst_stride) {
201   int a1 = ((input_dc + 4) >> 3);
202   double ftmp[5];
203   int low32;
204 
205   __asm__ volatile (
206     "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]        \n\t"
207     "pshufh     %[a1],      %[a1],          %[ftmp0]        \n\t"
208     "ulw        %[low32],   0x00(%[pred_ptr])               \n\t"
209     "mtc1       %[low32],   %[ftmp1]                        \n\t"
210     "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]        \n\t"
211     "paddsh     %[ftmp2],   %[ftmp2],       %[a1]           \n\t"
212     "packushb   %[ftmp1],   %[ftmp2],       %[ftmp0]        \n\t"
213     "gsswlc1    %[ftmp1],   0x03(%[dst_ptr])                \n\t"
214     "gsswrc1    %[ftmp1],   0x00(%[dst_ptr])                \n\t"
215 
216     MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
217     MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
218     "ulw        %[low32],   0x00(%[pred_ptr])               \n\t"
219     "mtc1       %[low32],   %[ftmp1]                        \n\t"
220     "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]        \n\t"
221     "paddsh     %[ftmp2],   %[ftmp2],       %[a1]           \n\t"
222     "packushb   %[ftmp1],   %[ftmp2],       %[ftmp0]        \n\t"
223     "gsswlc1    %[ftmp1],   0x03(%[dst_ptr])                \n\t"
224     "gsswrc1    %[ftmp1],   0x00(%[dst_ptr])                \n\t"
225 
226     MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
227     MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
228     "ulw        %[low32],   0x00(%[pred_ptr])               \n\t"
229     "mtc1       %[low32],   %[ftmp1]                        \n\t"
230     "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]        \n\t"
231     "paddsh     %[ftmp2],   %[ftmp2],       %[a1]           \n\t"
232     "packushb   %[ftmp1],   %[ftmp2],       %[ftmp0]        \n\t"
233     "gsswlc1    %[ftmp1],   0x03(%[dst_ptr])                \n\t"
234     "gsswrc1    %[ftmp1],   0x00(%[dst_ptr])                \n\t"
235 
236     MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
237     MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
238     "ulw        %[low32],   0x00(%[pred_ptr])               \n\t"
239     "mtc1       %[low32],   %[ftmp1]                        \n\t"
240     "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]        \n\t"
241     "paddsh     %[ftmp2],   %[ftmp2],       %[a1]           \n\t"
242     "packushb   %[ftmp1],   %[ftmp2],       %[ftmp0]        \n\t"
243     "gsswlc1    %[ftmp1],   0x03(%[dst_ptr])                \n\t"
244     "gsswrc1    %[ftmp1],   0x00(%[dst_ptr])                \n\t"
245     : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
246       [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [low32]"=&r"(low32),
247       [dst_ptr]"+&r"(dst_ptr), [pred_ptr]"+&r"(pred_ptr)
248     : [dst_stride]"r"((mips_reg)dst_stride),
249       [pred_stride]"r"((mips_reg)pred_stride), [a1]"f"(a1)
250     : "memory"
251   );
252 }
253 
vp8_short_inv_walsh4x4_mmi(int16_t * input,int16_t * mb_dqcoeff)254 void vp8_short_inv_walsh4x4_mmi(int16_t *input, int16_t *mb_dqcoeff) {
255   int i;
256   int16_t output[16];
257   double ftmp[12];
258   uint32_t tmp[1];
259   DECLARE_ALIGNED(8, const uint64_t, ff_ph_03) = { 0x0003000300030003ULL };
260 
261   __asm__ volatile (
262     MMI_LI(%[tmp0], 0x03)
263     "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
264     "mtc1       %[tmp0],    %[ftmp11]                           \n\t"
265     "gsldlc1    %[ftmp1],   0x07(%[ip])                         \n\t"
266     "gsldrc1    %[ftmp1],   0x00(%[ip])                         \n\t"
267     "gsldlc1    %[ftmp2],   0x0f(%[ip])                         \n\t"
268     "gsldrc1    %[ftmp2],   0x08(%[ip])                         \n\t"
269     "gsldlc1    %[ftmp3],   0x17(%[ip])                         \n\t"
270     "gsldrc1    %[ftmp3],   0x10(%[ip])                         \n\t"
271     "gsldlc1    %[ftmp4],   0x1f(%[ip])                         \n\t"
272     "gsldrc1    %[ftmp4],   0x18(%[ip])                         \n\t"
273     "paddh      %[ftmp5],   %[ftmp1],       %[ftmp2]            \n\t"
274     "psubh      %[ftmp6],   %[ftmp1],       %[ftmp2]            \n\t"
275     "paddh      %[ftmp7],   %[ftmp3],       %[ftmp4]            \n\t"
276     "psubh      %[ftmp8],   %[ftmp3],       %[ftmp4]            \n\t"
277 
278     "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]            \n\t"
279     "psubh      %[ftmp2],   %[ftmp5],       %[ftmp7]            \n\t"
280     "psubh      %[ftmp3],   %[ftmp6],       %[ftmp8]            \n\t"
281     "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]            \n\t"
282 
283     TRANSPOSE_4H
284     // a
285     "paddh      %[ftmp5],   %[ftmp1],       %[ftmp4]            \n\t"
286     // d
287     "psubh      %[ftmp6],   %[ftmp1],       %[ftmp4]            \n\t"
288     // b
289     "paddh      %[ftmp7],   %[ftmp2],       %[ftmp3]            \n\t"
290     // c
291     "psubh      %[ftmp8],   %[ftmp2],       %[ftmp3]            \n\t"
292 
293     "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]            \n\t"
294     "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]            \n\t"
295     "psubh      %[ftmp3],   %[ftmp5],       %[ftmp7]            \n\t"
296     "psubh      %[ftmp4],   %[ftmp6],       %[ftmp8]            \n\t"
297 
298     "paddh      %[ftmp1],   %[ftmp1],       %[ff_ph_03]         \n\t"
299     "psrah      %[ftmp1],   %[ftmp1],       %[ftmp11]           \n\t"
300     "paddh      %[ftmp2],   %[ftmp2],       %[ff_ph_03]         \n\t"
301     "psrah      %[ftmp2],   %[ftmp2],       %[ftmp11]           \n\t"
302     "paddh      %[ftmp3],   %[ftmp3],       %[ff_ph_03]         \n\t"
303     "psrah      %[ftmp3],   %[ftmp3],       %[ftmp11]           \n\t"
304     "paddh      %[ftmp4],   %[ftmp4],       %[ff_ph_03]         \n\t"
305     "psrah      %[ftmp4],   %[ftmp4],       %[ftmp11]           \n\t"
306 
307     TRANSPOSE_4H
308     "gssdlc1    %[ftmp1],   0x07(%[op])                         \n\t"
309     "gssdrc1    %[ftmp1],   0x00(%[op])                         \n\t"
310     "gssdlc1    %[ftmp2],   0x0f(%[op])                         \n\t"
311     "gssdrc1    %[ftmp2],   0x08(%[op])                         \n\t"
312     "gssdlc1    %[ftmp3],   0x17(%[op])                         \n\t"
313     "gssdrc1    %[ftmp3],   0x10(%[op])                         \n\t"
314     "gssdlc1    %[ftmp4],   0x1f(%[op])                         \n\t"
315     "gssdrc1    %[ftmp4],   0x18(%[op])                         \n\t"
316     : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
317       [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
318       [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
319       [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]),
320       [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0])
321     : [ip]"r"(input), [op]"r"(output), [ff_ph_03]"f"(ff_ph_03)
322     : "memory"
323   );
324 
325   for (i = 0; i < 16; i++) {
326     mb_dqcoeff[i * 16] = output[i];
327   }
328 }
329