1 /*
2 * Loongson SIMD optimized h264dsp
3 *
4 * Copyright (c) 2015 Loongson Technology Corporation Limited
5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6 * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7 * Heiher <r@hev.cc>
8 *
9 * This file is part of FFmpeg.
10 *
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26 #include "libavcodec/bit_depth_template.c"
27 #include "h264dsp_mips.h"
28 #include "libavutil/mips/mmiutils.h"
29 #include "libavutil/mem_internal.h"
30
ff_h264_add_pixels4_8_mmi(uint8_t * dst,int16_t * src,int stride)31 void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
32 {
33 double ftmp[9];
34 DECLARE_VAR_LOW32;
35
36 __asm__ volatile (
37 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
38 MMI_LDC1(%[ftmp1], %[src], 0x00)
39 MMI_LDC1(%[ftmp2], %[src], 0x08)
40 MMI_LDC1(%[ftmp3], %[src], 0x10)
41 MMI_LDC1(%[ftmp4], %[src], 0x18)
42 /* memset(src, 0, 32); */
43 MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x00)
44 MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x10)
45 MMI_ULWC1(%[ftmp5], %[dst0], 0x00)
46 MMI_ULWC1(%[ftmp6], %[dst1], 0x00)
47 MMI_ULWC1(%[ftmp7], %[dst2], 0x00)
48 MMI_ULWC1(%[ftmp8], %[dst3], 0x00)
49 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
50 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
51 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
52 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
53 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
54 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
55 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
56 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
57 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
58 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
59 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
60 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
61 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
62 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
63 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
64 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
65 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
66 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
67 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
68 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
69 RESTRICT_ASM_LOW32
70 [ftmp8]"=&f"(ftmp[8])
71 : [dst0]"r"(dst), [dst1]"r"(dst+stride),
72 [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
73 [src]"r"(src)
74 : "memory"
75 );
76
77 }
78
ff_h264_idct_add_8_mmi(uint8_t * dst,int16_t * block,int stride)79 void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
80 {
81 double ftmp[12];
82 uint64_t tmp[1];
83 DECLARE_VAR_LOW32;
84 DECLARE_VAR_ADDRT;
85
86 __asm__ volatile (
87 MMI_LDC1(%[ftmp0], %[block], 0x00)
88 MMI_LDC1(%[ftmp1], %[block], 0x08)
89 MMI_LDC1(%[ftmp2], %[block], 0x10)
90 MMI_LDC1(%[ftmp3], %[block], 0x18)
91 /* memset(block, 0, 32) */
92 "pxor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
93 MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x00)
94 MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x10)
95 "dli %[tmp0], 0x01 \n\t"
96 "mtc1 %[tmp0], %[ftmp8] \n\t"
97 "dli %[tmp0], 0x06 \n\t"
98 "mtc1 %[tmp0], %[ftmp9] \n\t"
99 "psrah %[ftmp4], %[ftmp1], %[ftmp8] \n\t"
100 "psrah %[ftmp5], %[ftmp3], %[ftmp8] \n\t"
101 "psubh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
102 "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
103 "paddh %[ftmp10], %[ftmp2], %[ftmp0] \n\t"
104 "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
105 "paddh %[ftmp11], %[ftmp5], %[ftmp10] \n\t"
106 "psubh %[ftmp2], %[ftmp10], %[ftmp5] \n\t"
107 "paddh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
108 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
109 "punpckhhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t"
110 "punpcklhw %[ftmp5], %[ftmp11], %[ftmp10] \n\t"
111 "punpckhhw %[ftmp4], %[ftmp0], %[ftmp2] \n\t"
112 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
113 "punpckhwd %[ftmp2], %[ftmp5], %[ftmp0] \n\t"
114 "punpcklwd %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
115 "punpcklwd %[ftmp10], %[ftmp1], %[ftmp4] \n\t"
116 "punpckhwd %[ftmp0], %[ftmp1], %[ftmp4] \n\t"
117 "paddh %[ftmp5], %[ftmp5], %[ff_pw_32] \n\t"
118 "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
119 "psrah %[ftmp3], %[ftmp0], %[ftmp8] \n\t"
120 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
121 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
122 "paddh %[ftmp1], %[ftmp10], %[ftmp5] \n\t"
123 "psubh %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
124 "paddh %[ftmp10], %[ftmp3], %[ftmp1] \n\t"
125 "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
126 "paddh %[ftmp11], %[ftmp4], %[ftmp5] \n\t"
127 "psubh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
128 MMI_ULWC1(%[ftmp2], %[dst], 0x00)
129 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
130 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
131 "psrah %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
132 "psrah %[ftmp4], %[ftmp11], %[ftmp9] \n\t"
133 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
134 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
135 "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
136 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
137 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
138 "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
139 MMI_SWC1(%[ftmp2], %[dst], 0x00)
140 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
141 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
142 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
143 MMI_ULWC1(%[ftmp2], %[dst], 0x00)
144 "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
145 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
146 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
147 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
148 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
149 "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
150 "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
151 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
152 MMI_SWC1(%[ftmp2], %[dst], 0x00)
153 "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
154 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
155 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
156 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
157 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
158 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
159 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
160 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
161 RESTRICT_ASM_LOW32
162 RESTRICT_ASM_ADDRT
163 [tmp0]"=&r"(tmp[0])
164 : [dst]"r"(dst), [block]"r"(block),
165 [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32.f)
166 : "memory"
167 );
168
169 }
170
ff_h264_idct8_add_8_mmi(uint8_t * dst,int16_t * block,int stride)171 void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
172 {
173 double ftmp[16];
174 uint64_t tmp[7];
175 mips_reg addr[1];
176 DECLARE_VAR_LOW32;
177 DECLARE_VAR_ADDRT;
178
179 __asm__ volatile (
180 "lhu %[tmp0], 0x00(%[block]) \n\t"
181 PTR_ADDI "$sp, $sp, -0x20 \n\t"
182 PTR_ADDIU "%[tmp0], %[tmp0], 0x20 \n\t"
183 MMI_LDC1(%[ftmp1], %[block], 0x10)
184 "sh %[tmp0], 0x00(%[block]) \n\t"
185 MMI_LDC1(%[ftmp2], %[block], 0x20)
186 "dli %[tmp0], 0x01 \n\t"
187 MMI_LDC1(%[ftmp3], %[block], 0x30)
188 "mtc1 %[tmp0], %[ftmp8] \n\t"
189 MMI_LDC1(%[ftmp5], %[block], 0x50)
190 MMI_LDC1(%[ftmp6], %[block], 0x60)
191 MMI_LDC1(%[ftmp7], %[block], 0x70)
192 "mov.d %[ftmp0], %[ftmp1] \n\t"
193 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
194 "psrah %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
195 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
196 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
197 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
198 "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
199 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
200 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
201 "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
202 "psubh %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
203 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
204 "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
205 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
206 "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
207 "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
208 "dli %[tmp0], 0x02 \n\t"
209 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
210 "mtc1 %[tmp0], %[ftmp9] \n\t"
211 "mov.d %[ftmp7], %[ftmp1] \n\t"
212 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
213 "psrah %[ftmp3], %[ftmp4], %[ftmp9] \n\t"
214 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
215 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
216 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
217 "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
218 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
219 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
220 "mov.d %[ftmp5], %[ftmp6] \n\t"
221 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
222 "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
223 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
224 "psubh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
225 MMI_LDC1(%[ftmp2], %[block], 0x00)
226 MMI_LDC1(%[ftmp5], %[block], 0x40)
227 "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
228 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
229 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
230 "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
231 "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
232 "paddh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
233 "psubh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
234 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
235 "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
236 "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
237 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
238 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
239 "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
240 "paddh %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
241 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
242 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
243 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
244 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
245 "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
246 "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
247 MMI_SDC1(%[ftmp6], %[block], 0x00)
248 "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
249 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
250 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
251 "punpckhhw %[ftmp0], %[ftmp3], %[ftmp1] \n\t"
252 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
253 "punpckhwd %[ftmp1], %[ftmp7], %[ftmp3] \n\t"
254 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
255 "punpckhwd %[ftmp3], %[ftmp6], %[ftmp0] \n\t"
256 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
257 MMI_LDC1(%[ftmp0], %[block], 0x00)
258 MMI_SDC1(%[ftmp7], $sp, 0x00)
259 MMI_SDC1(%[ftmp1], $sp, 0x10)
260 "dmfc1 %[tmp1], %[ftmp6] \n\t"
261 "dmfc1 %[tmp3], %[ftmp3] \n\t"
262 "punpckhhw %[ftmp3], %[ftmp5], %[ftmp2] \n\t"
263 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
264 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp0] \n\t"
265 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
266 "punpckhwd %[ftmp0], %[ftmp5], %[ftmp4] \n\t"
267 "punpcklwd %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
268 "punpckhwd %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
269 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
270 MMI_SDC1(%[ftmp5], $sp, 0x08)
271 MMI_SDC1(%[ftmp0], $sp, 0x18)
272 "dmfc1 %[tmp2], %[ftmp3] \n\t"
273 "dmfc1 %[tmp4], %[ftmp4] \n\t"
274 MMI_LDC1(%[ftmp1], %[block], 0x18)
275 MMI_LDC1(%[ftmp6], %[block], 0x28)
276 MMI_LDC1(%[ftmp2], %[block], 0x38)
277 MMI_LDC1(%[ftmp0], %[block], 0x58)
278 MMI_LDC1(%[ftmp3], %[block], 0x68)
279 MMI_LDC1(%[ftmp4], %[block], 0x78)
280 "mov.d %[ftmp7], %[ftmp1] \n\t"
281 "psrah %[ftmp5], %[ftmp0], %[ftmp8] \n\t"
282 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
283 "paddh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
284 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
285 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
286 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
287 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
288 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
289 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
290 "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
291 "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
292 "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
293 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
294 "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
295 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
296 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
297 "mov.d %[ftmp4], %[ftmp1] \n\t"
298 "psrah %[ftmp2], %[ftmp5], %[ftmp9] \n\t"
299 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
300 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
301 "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
302 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
303 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
304 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
305 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
306 "mov.d %[ftmp0], %[ftmp3] \n\t"
307 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
308 "psrah %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
309 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
310 "psubh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
311 MMI_LDC1(%[ftmp6], %[block], 0x08)
312 MMI_LDC1(%[ftmp0], %[block], 0x48)
313 "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
314 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
315 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
316 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
317 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
318 "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
319 "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
320 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
321 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
322 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
323 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
324 "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
325 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
326 "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
327 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
328 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
329 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
330 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
331 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
332 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
333 MMI_SDC1(%[ftmp3], %[block], 0x08)
334 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
335 "punpckhhw %[ftmp3], %[ftmp4], %[ftmp7] \n\t"
336 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
337 "punpckhhw %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
338 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
339 "punpckhwd %[ftmp1], %[ftmp4], %[ftmp2] \n\t"
340 "punpcklwd %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
341 "punpckhwd %[ftmp2], %[ftmp3], %[ftmp7] \n\t"
342 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
343 MMI_LDC1(%[ftmp7], %[block], 0x08)
344 "dmfc1 %[tmp5], %[ftmp4] \n\t"
345 "mov.d %[ftmp10], %[ftmp1] \n\t"
346 "mov.d %[ftmp12], %[ftmp3] \n\t"
347 "mov.d %[ftmp14], %[ftmp2] \n\t"
348 "punpckhhw %[ftmp2], %[ftmp0], %[ftmp6] \n\t"
349 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
350 "punpckhhw %[ftmp6], %[ftmp5], %[ftmp7] \n\t"
351 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
352 "punpckhwd %[ftmp7], %[ftmp0], %[ftmp5] \n\t"
353 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
354 "punpckhwd %[ftmp5], %[ftmp2], %[ftmp6] \n\t"
355 "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
356 "dmfc1 %[tmp6], %[ftmp0] \n\t"
357 "mov.d %[ftmp11], %[ftmp7] \n\t"
358 "mov.d %[ftmp13], %[ftmp2] \n\t"
359 "mov.d %[ftmp15], %[ftmp5] \n\t"
360 PTR_ADDIU "%[addr0], %[dst], 0x04 \n\t"
361 "mov.d %[ftmp7], %[ftmp10] \n\t"
362 "dmtc1 %[tmp3], %[ftmp6] \n\t"
363 MMI_LDC1(%[ftmp1], $sp, 0x10)
364 "dmtc1 %[tmp1], %[ftmp3] \n\t"
365 "mov.d %[ftmp4], %[ftmp1] \n\t"
366 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
367 "psrah %[ftmp0], %[ftmp7], %[ftmp8] \n\t"
368 "paddh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
369 "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
370 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
371 "paddh %[ftmp0], %[ftmp0], %[ftmp14] \n\t"
372 "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
373 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
374 "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
375 "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
376 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
377 "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t"
378 "psubh %[ftmp7], %[ftmp7], %[ftmp14] \n\t"
379 "psrah %[ftmp5], %[ftmp14], %[ftmp8] \n\t"
380 "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
381 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
382 "mov.d %[ftmp5], %[ftmp1] \n\t"
383 "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
384 "psrah %[ftmp6], %[ftmp0], %[ftmp9] \n\t"
385 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
386 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
387 "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
388 "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
389 "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
390 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
391 "mov.d %[ftmp7], %[ftmp12] \n\t"
392 "psrah %[ftmp2], %[ftmp12], %[ftmp8] \n\t"
393 "psrah %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
394 "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
395 "psubh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
396 MMI_LDC1(%[ftmp3], $sp, 0x00)
397 "dmtc1 %[tmp5], %[ftmp7] \n\t"
398 "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
399 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
400 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
401 "psubh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
402 "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
403 "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
404 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
405 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
406 "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
407 "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
408 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
409 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
410 "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
411 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
412 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
413 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
414 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
415 "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
416 "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
417 "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
418 MMI_SDC1(%[ftmp3], $sp, 0x00)
419 "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
420 MMI_SDC1(%[ftmp0], $sp, 0x10)
421 "dmfc1 %[tmp1], %[ftmp2] \n\t"
422 "pxor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
423 MMI_SDC1(%[ftmp2], %[block], 0x00)
424 MMI_SDC1(%[ftmp2], %[block], 0x08)
425 MMI_SDC1(%[ftmp2], %[block], 0x10)
426 MMI_SDC1(%[ftmp2], %[block], 0x18)
427 MMI_SDC1(%[ftmp2], %[block], 0x20)
428 MMI_SDC1(%[ftmp2], %[block], 0x28)
429 MMI_SDC1(%[ftmp2], %[block], 0x30)
430 MMI_SDC1(%[ftmp2], %[block], 0x38)
431 MMI_SDC1(%[ftmp2], %[block], 0x40)
432 MMI_SDC1(%[ftmp2], %[block], 0x48)
433 MMI_SDC1(%[ftmp2], %[block], 0x50)
434 MMI_SDC1(%[ftmp2], %[block], 0x58)
435 MMI_SDC1(%[ftmp2], %[block], 0x60)
436 MMI_SDC1(%[ftmp2], %[block], 0x68)
437 MMI_SDC1(%[ftmp2], %[block], 0x70)
438 MMI_SDC1(%[ftmp2], %[block], 0x78)
439 "dli %[tmp3], 0x06 \n\t"
440 "mtc1 %[tmp3], %[ftmp10] \n\t"
441 MMI_ULWC1(%[ftmp3], %[dst], 0x00)
442 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
443 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
444 "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
445 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
446 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
447 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
448 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
449 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
450 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
451 MMI_SWC1(%[ftmp3], %[dst], 0x00)
452 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
453 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
454 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
455 MMI_ULWC1(%[ftmp3], %[dst], 0x00)
456 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
457 "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
458 "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
459 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
460 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
461 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
462 "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
463 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
464 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
465 MMI_SWC1(%[ftmp3], %[dst], 0x00)
466 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
467 MMI_LDC1(%[ftmp5], $sp, 0x00)
468 MMI_LDC1(%[ftmp4], $sp, 0x10)
469 "dmtc1 %[tmp1], %[ftmp6] \n\t"
470 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
471 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
472 MMI_ULWC1(%[ftmp3], %[dst], 0x00)
473 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
474 "psrah %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
475 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
476 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
477 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
478 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
479 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
480 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
481 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
482 MMI_SWC1(%[ftmp3], %[dst], 0x00)
483 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
484 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
485 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
486 MMI_ULWC1(%[ftmp3], %[dst], 0x00)
487 MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
488 "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
489 "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
490 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
491 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
492 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
493 "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
494 "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
495 "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
496 MMI_SWC1(%[ftmp3], %[dst], 0x00)
497 MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
498 "dmtc1 %[tmp4], %[ftmp1] \n\t"
499 "dmtc1 %[tmp2], %[ftmp6] \n\t"
500 MMI_LDC1(%[ftmp4], $sp, 0x18)
501 "mov.d %[ftmp5], %[ftmp4] \n\t"
502 "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
503 "psrah %[ftmp7], %[ftmp11], %[ftmp8] \n\t"
504 "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
505 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
506 "paddh %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
507 "paddh %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
508 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
509 "paddh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
510 "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
511 "psubh %[ftmp3], %[ftmp11], %[ftmp1] \n\t"
512 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
513 "paddh %[ftmp5], %[ftmp5], %[ftmp15] \n\t"
514 "psubh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
515 "psrah %[ftmp2], %[ftmp15], %[ftmp8] \n\t"
516 "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
517 "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
518 "mov.d %[ftmp2], %[ftmp4] \n\t"
519 "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
520 "psrah %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
521 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
522 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
523 "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
524 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
525 "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
526 "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
527 "mov.d %[ftmp3], %[ftmp13] \n\t"
528 "psrah %[ftmp0], %[ftmp13], %[ftmp8] \n\t"
529 "psrah %[ftmp7], %[ftmp6], %[ftmp8] \n\t"
530 "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
531 "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
532 MMI_LDC1(%[ftmp6], $sp, 0x08)
533 "dmtc1 %[tmp6], %[ftmp3] \n\t"
534 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
535 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
536 "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
537 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
538 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
539 "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
540 "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
541 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
542 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
543 "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
544 "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
545 "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
546 "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
547 "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
548 "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
549 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
550 "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
551 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
552 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
553 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
554 MMI_SDC1(%[ftmp6], $sp, 0x08)
555 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
556 MMI_SDC1(%[ftmp7], $sp, 0x18)
557 "dmfc1 %[tmp2], %[ftmp0] \n\t"
558 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
559 MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
560 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
561 "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
562 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
563 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
564 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
565 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
566 "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
567 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
568 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
569 MMI_SWC1(%[ftmp6], %[addr0], 0x00)
570 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
571 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
572 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
573 MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
574 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
575 "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
576 "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
577 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
578 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
579 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
580 "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
581 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
582 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
583 MMI_SWC1(%[ftmp6], %[addr0], 0x00)
584 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
585 MMI_LDC1(%[ftmp2], $sp, 0x08)
586 MMI_LDC1(%[ftmp5], $sp, 0x18)
587 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
588 "dmtc1 %[tmp2], %[ftmp1] \n\t"
589 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
590 MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
591 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
592 "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
593 "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
594 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
595 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
596 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
597 "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
598 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
599 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
600 MMI_SWC1(%[ftmp6], %[addr0], 0x00)
601 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
602 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
603 PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
604 MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
605 MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
606 "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
607 "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
608 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
609 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
610 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
611 "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
612 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
613 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
614 MMI_SWC1(%[ftmp6], %[addr0], 0x00)
615 MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
616 PTR_ADDIU "$sp, $sp, 0x20 \n\t"
617 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
618 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
619 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
620 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
621 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
622 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
623 [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
624 [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
625 [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
626 [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
627 [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]),
628 [tmp6]"=&r"(tmp[6]),
629 RESTRICT_ASM_LOW32
630 RESTRICT_ASM_ADDRT
631 [addr0]"=&r"(addr[0])
632 : [dst]"r"(dst), [block]"r"(block),
633 [stride]"r"((mips_reg)stride)
634 : "memory"
635 );
636
637 }
638
ff_h264_idct_dc_add_8_mmi(uint8_t * dst,int16_t * block,int stride)639 void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
640 {
641 int dc = (block[0] + 32) >> 6;
642 double ftmp[6];
643 DECLARE_VAR_LOW32;
644
645 block[0] = 0;
646
647 __asm__ volatile (
648 "mtc1 %[dc], %[ftmp5] \n\t"
649 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
650 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
651 MMI_ULWC1(%[ftmp1], %[dst0], 0x00)
652 MMI_ULWC1(%[ftmp2], %[dst1], 0x00)
653 MMI_ULWC1(%[ftmp3], %[dst2], 0x00)
654 MMI_ULWC1(%[ftmp4], %[dst3], 0x00)
655 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
656 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
657 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
658 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
659 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
660 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
661 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
662 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
663 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
664 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
665 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
666 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
667 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
668 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
669 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
670 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
671 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
672 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
673 [ftmp4]"=&f"(ftmp[4]),
674 RESTRICT_ASM_LOW32
675 [ftmp5]"=&f"(ftmp[5])
676 : [dst0]"r"(dst), [dst1]"r"(dst+stride),
677 [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
678 [dc]"r"(dc)
679 : "memory"
680 );
681 }
682
ff_h264_idct8_dc_add_8_mmi(uint8_t * dst,int16_t * block,int stride)683 void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
684 {
685 int dc = (block[0] + 32) >> 6;
686 double ftmp[10];
687 DECLARE_VAR_ALL64;
688
689 block[0] = 0;
690
691 __asm__ volatile (
692 "mtc1 %[dc], %[ftmp5] \n\t"
693 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
694 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
695 MMI_LDC1(%[ftmp1], %[dst0], 0x00)
696 MMI_LDC1(%[ftmp2], %[dst1], 0x00)
697 MMI_LDC1(%[ftmp3], %[dst2], 0x00)
698 MMI_LDC1(%[ftmp4], %[dst3], 0x00)
699 "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
700 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
701 "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
702 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
703 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
704 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
705 "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
706 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
707 "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
708 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
709 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
710 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
711 "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
712 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
713 "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
714 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
715 "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
716 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
717 "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
718 "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
719 MMI_SDC1(%[ftmp1], %[dst0], 0x00)
720 MMI_SDC1(%[ftmp2], %[dst1], 0x00)
721 MMI_SDC1(%[ftmp3], %[dst2], 0x00)
722 MMI_SDC1(%[ftmp4], %[dst3], 0x00)
723
724 MMI_LDC1(%[ftmp1], %[dst4], 0x00)
725 MMI_LDC1(%[ftmp2], %[dst5], 0x00)
726 MMI_LDC1(%[ftmp3], %[dst6], 0x00)
727 MMI_LDC1(%[ftmp4], %[dst7], 0x00)
728 "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
729 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
730 "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
731 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
732 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
733 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
734 "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
735 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
736 "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
737 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
738 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
739 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
740 "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
741 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
742 "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
743 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
744 "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
745 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
746 "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
747 "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
748 MMI_SDC1(%[ftmp1], %[dst4], 0x00)
749 MMI_SDC1(%[ftmp2], %[dst5], 0x00)
750 MMI_SDC1(%[ftmp3], %[dst6], 0x00)
751 MMI_SDC1(%[ftmp4], %[dst7], 0x00)
752 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
753 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
754 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
755 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
756 [ftmp8]"=&f"(ftmp[8]),
757 RESTRICT_ASM_ALL64
758 [ftmp9]"=&f"(ftmp[9])
759 : [dst0]"r"(dst), [dst1]"r"(dst+stride),
760 [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
761 [dst4]"r"(dst+4*stride), [dst5]"r"(dst+5*stride),
762 [dst6]"r"(dst+6*stride), [dst7]"r"(dst+7*stride),
763 [dc]"r"(dc)
764 : "memory"
765 );
766 }
767
ff_h264_idct_add16_8_mmi(uint8_t * dst,const int * block_offset,int16_t * block,int stride,const uint8_t nnzc[5* 8])768 void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
769 int16_t *block, int stride,
770 const uint8_t nnzc[5 * 8])
771 {
772 int i;
773 for(i=0; i<16; i++){
774 int nnz = nnzc[ scan8[i] ];
775 if(nnz){
776 if(nnz==1 && ((int16_t*)block)[i*16])
777 ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
778 stride);
779 else
780 ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16,
781 stride);
782 }
783 }
784 }
785
ff_h264_idct_add16intra_8_mmi(uint8_t * dst,const int * block_offset,int16_t * block,int stride,const uint8_t nnzc[5* 8])786 void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
787 int16_t *block, int stride, const uint8_t nnzc[5 * 8])
788 {
789 int i;
790 for(i=0; i<16; i++){
791 if(nnzc[ scan8[i] ])
792 ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride);
793 else if(((int16_t*)block)[i*16])
794 ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
795 stride);
796 }
797 }
798
ff_h264_idct8_add4_8_mmi(uint8_t * dst,const int * block_offset,int16_t * block,int stride,const uint8_t nnzc[5* 8])799 void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
800 int16_t *block, int stride, const uint8_t nnzc[5 * 8])
801 {
802 int i;
803 for(i=0; i<16; i+=4){
804 int nnz = nnzc[ scan8[i] ];
805 if(nnz){
806 if(nnz==1 && ((int16_t*)block)[i*16])
807 ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i],
808 block + i*16, stride);
809 else
810 ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16,
811 stride);
812 }
813 }
814 }
815
ff_h264_idct_add8_8_mmi(uint8_t ** dest,const int * block_offset,int16_t * block,int stride,const uint8_t nnzc[15* 8])816 void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
817 int16_t *block, int stride, const uint8_t nnzc[15*8])
818 {
819 int i, j;
820 for(j=1; j<3; j++){
821 for(i=j*16; i<j*16+4; i++){
822 if(nnzc[ scan8[i] ])
823 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
824 block + i*16, stride);
825 else if(((int16_t*)block)[i*16])
826 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
827 block + i*16, stride);
828 }
829 }
830 }
831
ff_h264_idct_add8_422_8_mmi(uint8_t ** dest,const int * block_offset,int16_t * block,int stride,const uint8_t nnzc[15* 8])832 void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
833 int16_t *block, int stride, const uint8_t nnzc[15*8])
834 {
835 int i, j;
836
837 for(j=1; j<3; j++){
838 for(i=j*16; i<j*16+4; i++){
839 if(nnzc[ scan8[i] ])
840 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
841 block + i*16, stride);
842 else if(((int16_t*)block)[i*16])
843 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
844 block + i*16, stride);
845 }
846 }
847
848 for(j=1; j<3; j++){
849 for(i=j*16+4; i<j*16+8; i++){
850 if(nnzc[ scan8[i+4] ])
851 ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4],
852 block + i*16, stride);
853 else if(((int16_t*)block)[i*16])
854 ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4],
855 block + i*16, stride);
856 }
857 }
858 }
859
ff_h264_luma_dc_dequant_idct_8_mmi(int16_t * output,int16_t * input,int qmul)860 void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
861 int qmul)
862 {
863 double ftmp[10];
864 uint64_t tmp[2];
865 DECLARE_VAR_ALL64;
866
867 __asm__ volatile (
868 ".set noreorder \n\t"
869 "dli %[tmp0], 0x08 \n\t"
870 MMI_LDC1(%[ftmp3], %[input], 0x18)
871 "mtc1 %[tmp0], %[ftmp8] \n\t"
872 MMI_LDC1(%[ftmp2], %[input], 0x10)
873 "dli %[tmp0], 0x20 \n\t"
874 MMI_LDC1(%[ftmp1], %[input], 0x08)
875 "mtc1 %[tmp0], %[ftmp9] \n\t"
876 MMI_LDC1(%[ftmp0], %[input], 0x00)
877 "mov.d %[ftmp4], %[ftmp3] \n\t"
878 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
879 "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
880 "mov.d %[ftmp4], %[ftmp1] \n\t"
881 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
882 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
883 "mov.d %[ftmp4], %[ftmp3] \n\t"
884 "paddh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
885 "psubh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
886 "mov.d %[ftmp4], %[ftmp2] \n\t"
887 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
888 "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
889 "mov.d %[ftmp4], %[ftmp3] \n\t"
890 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
891 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
892 "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
893 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
894 "punpckhwd %[ftmp2], %[ftmp3], %[ftmp0] \n\t"
895 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
896 "mov.d %[ftmp0], %[ftmp4] \n\t"
897 "punpcklwd %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
898 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
899 "mov.d %[ftmp1], %[ftmp0] \n\t"
900 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
901 "psubh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
902 "mov.d %[ftmp1], %[ftmp2] \n\t"
903 "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
904 "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
905 "mov.d %[ftmp1], %[ftmp0] \n\t"
906 "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
907 "psubh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
908 "mov.d %[ftmp1], %[ftmp4] \n\t"
909 "daddi %[tmp0], %[qmul], -0x7fff \n\t"
910 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
911 "bgtz %[tmp0], 1f \n\t"
912 "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
913 "ori %[tmp0], $0, 0x80 \n\t"
914 "dsll %[tmp0], %[tmp0], 0x10 \n\t"
915 "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
916 "daddu %[qmul], %[qmul], %[tmp0] \n\t"
917 "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
918 "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
919 "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
920 "mtc1 %[qmul], %[ftmp7] \n\t"
921 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
922 "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
923 "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
924 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
925 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
926 "psraw %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
927 "psraw %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
928 "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
929 "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
930 "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
931 "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
932 "dmfc1 %[tmp1], %[ftmp0] \n\t"
933 "ssrld %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
934 "mfc1 %[input], %[ftmp0] \n\t"
935 "sh %[tmp1], 0x00(%[output]) \n\t"
936 "sh %[input], 0x80(%[output]) \n\t"
937 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
938 PTR_SRL "%[input], %[input], 0x10 \n\t"
939 "sh %[tmp1], 0x20(%[output]) \n\t"
940 "sh %[input], 0xa0(%[output]) \n\t"
941 "dmfc1 %[tmp1], %[ftmp2] \n\t"
942 "ssrld %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
943 "mfc1 %[input], %[ftmp2] \n\t"
944 "sh %[tmp1], 0x40(%[output]) \n\t"
945 "sh %[input], 0xc0(%[output]) \n\t"
946 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
947 PTR_SRL "%[input], %[input], 0x10 \n\t"
948 "sh %[tmp1], 0x60(%[output]) \n\t"
949 "sh %[input], 0xe0(%[output]) \n\t"
950 "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
951 "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
952 "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
953 "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
954 "mtc1 %[qmul], %[ftmp7] \n\t"
955 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
956 "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
957 "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
958 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
959 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
960 "psraw %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
961 "psraw %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
962 "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
963 "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
964 "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
965 "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
966 "dmfc1 %[tmp1], %[ftmp3] \n\t"
967 "ssrld %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
968 "mfc1 %[input], %[ftmp3] \n\t"
969 "sh %[tmp1], 0x100(%[output]) \n\t"
970 "sh %[input], 0x180(%[output]) \n\t"
971 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
972 PTR_SRL "%[input], %[input], 0x10 \n\t"
973 "sh %[tmp1], 0x120(%[output]) \n\t"
974 "sh %[input], 0x1a0(%[output]) \n\t"
975 "dmfc1 %[tmp1], %[ftmp4] \n\t"
976 "ssrld %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
977 "mfc1 %[input], %[ftmp4] \n\t"
978 "sh %[tmp1], 0x140(%[output]) \n\t"
979 "sh %[input], 0x1c0(%[output]) \n\t"
980 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
981 PTR_SRL "%[input], %[input], 0x10 \n\t"
982 "sh %[tmp1], 0x160(%[output]) \n\t"
983 "j 2f \n\t"
984 "sh %[input], 0x1e0(%[output]) \n\t"
985 "1: \n\t"
986 "ori %[tmp0], $0, 0x1f \n\t"
987 #if HAVE_LOONGSON3
988 "clz %[tmp1], %[qmul] \n\t"
989 #elif HAVE_LOONGSON2
990 #endif
991 "ori %[input], $0, 0x07 \n\t"
992 "dsubu %[tmp1], %[tmp0], %[tmp1] \n\t"
993 "ori %[tmp0], $0, 0x80 \n\t"
994 "dsll %[tmp0], %[tmp0], 0x10 \n\t"
995 "daddu %[qmul], %[qmul], %[tmp0] \n\t"
996 "dsubu %[tmp0], %[tmp1], %[input] \n\t"
997 "movn %[tmp1], %[input], %[tmp0] \n\t"
998 PTR_ADDIU "%[input], %[input], 0x01 \n\t"
999 "andi %[tmp0], %[tmp1], 0xff \n\t"
1000 "srlv %[qmul], %[qmul], %[tmp0] \n\t"
1001 PTR_SUBU "%[input], %[input], %[tmp1] \n\t"
1002 "mtc1 %[input], %[ftmp6] \n\t"
1003 "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
1004 "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
1005 "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
1006 "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
1007 "mtc1 %[qmul], %[ftmp7] \n\t"
1008 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1009 "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1010 "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1011 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1012 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1013 "psraw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1014 "psraw %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1015 "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1016 "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1017 "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1018 "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1019 "dmfc1 %[tmp1], %[ftmp0] \n\t"
1020 "ssrld %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1021 "sh %[tmp1], 0x00(%[output]) \n\t"
1022 "mfc1 %[input], %[ftmp0] \n\t"
1023 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1024 "sh %[input], 0x80(%[output]) \n\t"
1025 "sh %[tmp1], 0x20(%[output]) \n\t"
1026 PTR_SRL "%[input], %[input], 0x10 \n\t"
1027 "dmfc1 %[tmp1], %[ftmp2] \n\t"
1028 "sh %[input], 0xa0(%[output]) \n\t"
1029 "ssrld %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
1030 "sh %[tmp1], 0x40(%[output]) \n\t"
1031 "mfc1 %[input], %[ftmp2] \n\t"
1032 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1033 "sh %[input], 0xc0(%[output]) \n\t"
1034 "sh %[tmp1], 0x60(%[output]) \n\t"
1035 PTR_SRL "%[input], %[input], 0x10 \n\t"
1036 "sh %[input], 0xe0(%[output]) \n\t"
1037 "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
1038 "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
1039 "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
1040 "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
1041 "mtc1 %[qmul], %[ftmp7] \n\t"
1042 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1043 "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1044 "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1045 "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1046 "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1047 "psraw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1048 "psraw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1049 "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1050 "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1051 "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1052 "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1053 "dmfc1 %[tmp1], %[ftmp3] \n\t"
1054 "ssrld %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1055 "mfc1 %[input], %[ftmp3] \n\t"
1056 "sh %[tmp1], 0x100(%[output]) \n\t"
1057 "sh %[input], 0x180(%[output]) \n\t"
1058 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1059 PTR_SRL "%[input], %[input], 0x10 \n\t"
1060 "sh %[tmp1], 0x120(%[output]) \n\t"
1061 "sh %[input], 0x1a0(%[output]) \n\t"
1062 "dmfc1 %[tmp1], %[ftmp4] \n\t"
1063 "ssrld %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1064 "mfc1 %[input], %[ftmp4] \n\t"
1065 "sh %[tmp1], 0x140(%[output]) \n\t"
1066 "sh %[input], 0x1c0(%[output]) \n\t"
1067 "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1068 PTR_SRL "%[input], %[input], 0x10 \n\t"
1069 "sh %[tmp1], 0x160(%[output]) \n\t"
1070 "sh %[input], 0x1e0(%[output]) \n\t"
1071 "2: \n\t"
1072 ".set reorder \n\t"
1073 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1074 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1075 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1076 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1077 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1078 [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
1079 RESTRICT_ASM_ALL64
1080 [output]"+&r"(output), [input]"+&r"(input),
1081 [qmul]"+&r"(qmul)
1082 : [ff_pw_1]"f"(ff_pw_1.f)
1083 : "memory"
1084 );
1085 }
1086
ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t * block,int qmul)1087 void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
1088 {
1089 int temp[8];
1090 int t[8];
1091
1092 temp[0] = block[0] + block[16];
1093 temp[1] = block[0] - block[16];
1094 temp[2] = block[32] + block[48];
1095 temp[3] = block[32] - block[48];
1096 temp[4] = block[64] + block[80];
1097 temp[5] = block[64] - block[80];
1098 temp[6] = block[96] + block[112];
1099 temp[7] = block[96] - block[112];
1100
1101 t[0] = temp[0] + temp[4] + temp[2] + temp[6];
1102 t[1] = temp[0] - temp[4] + temp[2] - temp[6];
1103 t[2] = temp[0] - temp[4] - temp[2] + temp[6];
1104 t[3] = temp[0] + temp[4] - temp[2] - temp[6];
1105 t[4] = temp[1] + temp[5] + temp[3] + temp[7];
1106 t[5] = temp[1] - temp[5] + temp[3] - temp[7];
1107 t[6] = temp[1] - temp[5] - temp[3] + temp[7];
1108 t[7] = temp[1] + temp[5] - temp[3] - temp[7];
1109
1110 block[ 0]= (t[0]*qmul + 128) >> 8;
1111 block[ 32]= (t[1]*qmul + 128) >> 8;
1112 block[ 64]= (t[2]*qmul + 128) >> 8;
1113 block[ 96]= (t[3]*qmul + 128) >> 8;
1114 block[ 16]= (t[4]*qmul + 128) >> 8;
1115 block[ 48]= (t[5]*qmul + 128) >> 8;
1116 block[ 80]= (t[6]*qmul + 128) >> 8;
1117 block[112]= (t[7]*qmul + 128) >> 8;
1118 }
1119
ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t * block,int qmul)1120 void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
1121 {
1122 int a,b,c,d;
1123
1124 d = block[0] - block[16];
1125 a = block[0] + block[16];
1126 b = block[32] - block[48];
1127 c = block[32] + block[48];
1128 block[0] = ((a+c)*qmul) >> 7;
1129 block[16]= ((d+b)*qmul) >> 7;
1130 block[32]= ((a-c)*qmul) >> 7;
1131 block[48]= ((d-b)*qmul) >> 7;
1132 }
1133
ff_h264_weight_pixels16_8_mmi(uint8_t * block,ptrdiff_t stride,int height,int log2_denom,int weight,int offset)1134 void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1135 int log2_denom, int weight, int offset)
1136 {
1137 int y;
1138 double ftmp[8];
1139 DECLARE_VAR_ALL64;
1140
1141 offset <<= log2_denom;
1142
1143 if (log2_denom)
1144 offset += 1 << (log2_denom - 1);
1145
1146 for (y=0; y<height; y++, block+=stride) {
1147 __asm__ volatile (
1148 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1149 MMI_LDC1(%[ftmp1], %[block0], 0x00)
1150 MMI_LDC1(%[ftmp2], %[block1], 0x00)
1151 "mtc1 %[weight], %[ftmp3] \n\t"
1152 "mtc1 %[offset], %[ftmp4] \n\t"
1153 "mtc1 %[log2_denom], %[ftmp5] \n\t"
1154 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1155 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1156 "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
1157 "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
1158 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1159 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1160 "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1161 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1162 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1163 "pmullh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1164 "paddsh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1165 "paddsh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1166 "paddsh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1167 "paddsh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1168 "psrah %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1169 "psrah %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1170 "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1171 "psrah %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1172 "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1173 "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1174 MMI_SDC1(%[ftmp1], %[block0], 0x00)
1175 MMI_SDC1(%[ftmp2], %[block1], 0x00)
1176 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1177 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1178 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1179 [ftmp6]"=&f"(ftmp[6]),
1180 RESTRICT_ASM_ALL64
1181 [ftmp7]"=&f"(ftmp[7])
1182 : [block0]"r"(block), [block1]"r"(block+8),
1183 [weight]"r"(weight), [offset]"r"(offset),
1184 [log2_denom]"r"(log2_denom)
1185 : "memory"
1186 );
1187 }
1188 }
1189
ff_h264_biweight_pixels16_8_mmi(uint8_t * dst,uint8_t * src,ptrdiff_t stride,int height,int log2_denom,int weightd,int weights,int offset)1190 void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src,
1191 ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1192 int offset)
1193 {
1194 int y;
1195 double ftmp[9];
1196 DECLARE_VAR_ALL64;
1197
1198 offset = ((offset + 1) | 1) << log2_denom;
1199
1200 for (y=0; y<height; y++, dst+=stride, src+=stride) {
1201 __asm__ volatile (
1202 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1203 MMI_LDC1(%[ftmp1], %[src0], 0x00)
1204 MMI_LDC1(%[ftmp2], %[dst0], 0x00)
1205 "mtc1 %[weights], %[ftmp3] \n\t"
1206 "mtc1 %[weightd], %[ftmp4] \n\t"
1207 "mtc1 %[offset], %[ftmp5] \n\t"
1208 "mtc1 %[log2_denom], %[ftmp6] \n\t"
1209 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1210 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1211 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1212 "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1213 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1214 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1215 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1216 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1217 "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1218 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1219 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1220 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1221 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1222 "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1223 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1224 "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1225 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1226 "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1227 MMI_SDC1(%[ftmp1], %[dst0], 0x00)
1228 MMI_LDC1(%[ftmp1], %[src1], 0x00)
1229 MMI_LDC1(%[ftmp2], %[dst1], 0x00)
1230 "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1231 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1232 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1233 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1234 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1235 "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1236 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1237 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1238 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1239 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1240 "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1241 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1242 "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1243 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1244 "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1245 MMI_SDC1(%[ftmp1], %[dst1], 0x00)
1246 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1247 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1248 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1249 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1250 RESTRICT_ASM_ALL64
1251 [ftmp8]"=&f"(ftmp[8])
1252 : [dst0]"r"(dst), [dst1]"r"(dst+8),
1253 [src0]"r"(src), [src1]"r"(src+8),
1254 [weights]"r"(weights), [weightd]"r"(weightd),
1255 [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1256 : "memory"
1257 );
1258 }
1259 }
1260
ff_h264_weight_pixels8_8_mmi(uint8_t * block,ptrdiff_t stride,int height,int log2_denom,int weight,int offset)1261 void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1262 int log2_denom, int weight, int offset)
1263 {
1264 int y;
1265 double ftmp[6];
1266 DECLARE_VAR_ALL64;
1267
1268 offset <<= log2_denom;
1269
1270 if (log2_denom)
1271 offset += 1 << (log2_denom - 1);
1272
1273 for (y=0; y<height; y++, block+=stride) {
1274 __asm__ volatile (
1275 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1276 MMI_LDC1(%[ftmp1], %[block], 0x00)
1277 "mtc1 %[weight], %[ftmp2] \n\t"
1278 "mtc1 %[offset], %[ftmp3] \n\t"
1279 "mtc1 %[log2_denom], %[ftmp5] \n\t"
1280 "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1281 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1282 "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
1283 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1284 "pmullh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
1285 "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1286 "paddsh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1287 "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1288 "psrah %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1289 "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1290 "packushb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1291 MMI_SDC1(%[ftmp1], %[block], 0x00)
1292 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1293 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1294 [ftmp4]"=&f"(ftmp[4]),
1295 RESTRICT_ASM_ALL64
1296 [ftmp5]"=&f"(ftmp[5])
1297 : [block]"r"(block), [weight]"r"(weight),
1298 [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1299 : "memory"
1300 );
1301 }
1302 }
1303
ff_h264_biweight_pixels8_8_mmi(uint8_t * dst,uint8_t * src,ptrdiff_t stride,int height,int log2_denom,int weightd,int weights,int offset)1304 void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src,
1305 ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1306 int offset)
1307 {
1308 int y;
1309 double ftmp[9];
1310 DECLARE_VAR_ALL64;
1311
1312 offset = ((offset + 1) | 1) << log2_denom;
1313
1314 for (y=0; y<height; y++, dst+=stride, src+=stride) {
1315 __asm__ volatile (
1316 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1317 MMI_LDC1(%[ftmp1], %[src], 0x00)
1318 MMI_LDC1(%[ftmp2], %[dst], 0x00)
1319 "mtc1 %[weights], %[ftmp3] \n\t"
1320 "mtc1 %[weightd], %[ftmp4] \n\t"
1321 "mtc1 %[offset], %[ftmp5] \n\t"
1322 "mtc1 %[log2_denom], %[ftmp6] \n\t"
1323 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1324 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1325 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1326 "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1327 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1328 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1329 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1330 "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1331 "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1332 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1333 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1334 "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1335 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1336 "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1337 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1338 "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1339 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1340 "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1341 MMI_SDC1(%[ftmp1], %[dst], 0x00)
1342 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1343 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1344 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1345 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1346 RESTRICT_ASM_ALL64
1347 [ftmp8]"=&f"(ftmp[8])
1348 : [dst]"r"(dst), [src]"r"(src),
1349 [weights]"r"(weights), [weightd]"r"(weightd),
1350 [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1351 : "memory"
1352 );
1353 }
1354 }
1355
ff_h264_weight_pixels4_8_mmi(uint8_t * block,ptrdiff_t stride,int height,int log2_denom,int weight,int offset)1356 void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1357 int log2_denom, int weight, int offset)
1358 {
1359 int y;
1360 double ftmp[5];
1361 DECLARE_VAR_LOW32;
1362
1363 offset <<= log2_denom;
1364
1365 if (log2_denom)
1366 offset += 1 << (log2_denom - 1);
1367
1368 for (y=0; y<height; y++, block+=stride) {
1369 __asm__ volatile (
1370 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1371 MMI_ULWC1(%[ftmp1], %[block], 0x00)
1372 "mtc1 %[weight], %[ftmp2] \n\t"
1373 "mtc1 %[offset], %[ftmp3] \n\t"
1374 "mtc1 %[log2_denom], %[ftmp4] \n\t"
1375 "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1376 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1377 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1378 "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1379 "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1380 "psrah %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1381 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1382 MMI_SWC1(%[ftmp1], %[block], 0x00)
1383 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1384 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1385 RESTRICT_ASM_LOW32
1386 [ftmp4]"=&f"(ftmp[4])
1387 : [block]"r"(block), [weight]"r"(weight),
1388 [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1389 : "memory"
1390 );
1391 }
1392 }
1393
ff_h264_biweight_pixels4_8_mmi(uint8_t * dst,uint8_t * src,ptrdiff_t stride,int height,int log2_denom,int weightd,int weights,int offset)1394 void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
1395 ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1396 int offset)
1397 {
1398 int y;
1399 double ftmp[7];
1400 DECLARE_VAR_LOW32;
1401
1402 offset = ((offset + 1) | 1) << log2_denom;
1403
1404 for (y=0; y<height; y++, dst+=stride, src+=stride) {
1405 __asm__ volatile (
1406 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1407 MMI_ULWC1(%[ftmp1], %[src], 0x00)
1408 MMI_ULWC1(%[ftmp2], %[dst], 0x00)
1409 "mtc1 %[weight], %[ftmp3] \n\t"
1410 "mtc1 %[weightd], %[ftmp4] \n\t"
1411 "mtc1 %[offset], %[ftmp5] \n\t"
1412 "mtc1 %[log2_denom], %[ftmp6] \n\t"
1413 "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1414 "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1415 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1416 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1417 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1418 "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1419 "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1420 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1421 "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1422 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1423 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1424 MMI_SWC1(%[ftmp1], %[dst], 0x00)
1425 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1426 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1427 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1428 RESTRICT_ASM_LOW32
1429 [ftmp6]"=&f"(ftmp[6])
1430 : [dst]"r"(dst), [src]"r"(src),
1431 [weight]"r"(weights), [weightd]"r"(weightd),
1432 [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1433 : "memory"
1434 );
1435 }
1436 }
1437
ff_deblock_v8_luma_8_mmi(uint8_t * pix,ptrdiff_t stride,int alpha,int beta,int8_t * tc0)1438 void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1439 int8_t *tc0)
1440 {
1441 double ftmp[12];
1442 mips_reg addr[2];
1443 DECLARE_VAR_LOW32;
1444 DECLARE_VAR_ALL64;
1445 DECLARE_VAR_ADDRT;
1446
1447 __asm__ volatile (
1448 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1449 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1450 PTR_ADDU "%[addr1], %[stride], %[addr0] \n\t"
1451 "addi %[alpha], %[alpha], -0x01 \n\t"
1452 PTR_SUBU "%[addr1], $0, %[addr1] \n\t"
1453 "addi %[beta], %[beta], -0x01 \n\t"
1454 PTR_ADDU "%[addr1], %[addr1], %[pix] \n\t"
1455 MMI_LDC1(%[ftmp3], %[pix], 0x00)
1456 MMI_LDXC1(%[ftmp1], %[addr1], %[stride], 0x00)
1457 MMI_LDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1458 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1459 "mtc1 %[alpha], %[ftmp5] \n\t"
1460 "mtc1 %[beta], %[ftmp6] \n\t"
1461 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1462 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1463 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1464 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1465 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1466 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1467 "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1468 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1469 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1470 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1471 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1472 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1473 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1474 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1475 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1476 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1477 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1478 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1479 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1480 "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
1481 MMI_ULWC1(%[ftmp5], %[tc0], 0x00)
1482 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1483 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp5] \n\t"
1484 "pcmpgtb %[ftmp5], %[ftmp9], %[ftmp4] \n\t"
1485 MMI_LDC1(%[ftmp4], %[addr1], 0x00)
1486 "pand %[ftmp10], %[ftmp5], %[ftmp8] \n\t"
1487 "psubusb %[ftmp8], %[ftmp4], %[ftmp2] \n\t"
1488 "psubusb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1489 "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1490 "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1491 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1492 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1493 "pand %[ftmp5], %[ftmp10], %[ftmp9] \n\t"
1494 "psubb %[ftmp8], %[ftmp5], %[ftmp7] \n\t"
1495 "pand %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1496 "pavgb %[ftmp5], %[ftmp2], %[ftmp3] \n\t"
1497 MMI_LDC1(%[ftmp11], %[addr1], 0x00)
1498 "pavgb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1499 "pxor %[ftmp5], %[ftmp5], %[ftmp11] \n\t"
1500 "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1501 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1502 "psubusb %[ftmp5], %[ftmp1], %[ftmp7] \n\t"
1503 "paddusb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1504 "pmaxub %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1505 "pminub %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1506 MMI_SDXC1(%[ftmp4], %[addr1], %[stride], 0x00)
1507 MMI_LDXC1(%[ftmp5], %[pix], %[addr0], 0x00)
1508 "psubusb %[ftmp4], %[ftmp5], %[ftmp3] \n\t"
1509 "psubusb %[ftmp7], %[ftmp3], %[ftmp5] \n\t"
1510 "psubusb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1511 "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1512 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1513 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1514 "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1515 "pand %[ftmp6], %[ftmp9], %[ftmp7] \n\t"
1516 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1517 "pavgb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1518 MMI_LDXC1(%[ftmp11], %[pix], %[addr0], 0x00)
1519 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1520 "pxor %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1521 "pand %[ftmp7], %[ftmp7], %[ff_pb_1] \n\t"
1522 "psubusb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1523 "psubusb %[ftmp7], %[ftmp4], %[ftmp6] \n\t"
1524 "paddusb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1525 "pmaxub %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1526 "pminub %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1527 MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1528 "pxor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1529 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1530 "pand %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1531 "pxor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1532 "pxor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1533 "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1534 "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1535 "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1536 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1537 "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1538 "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1539 "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1540 "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1541 "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1542 "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1543 "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1544 "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1545 "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1546 MMI_SDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1547 MMI_SDC1(%[ftmp3], %[pix], 0x00)
1548 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1549 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1550 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1551 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1552 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1553 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1554 RESTRICT_ASM_LOW32
1555 RESTRICT_ASM_ALL64
1556 RESTRICT_ASM_ADDRT
1557 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
1558 : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1559 [alpha]"r"((mips_reg)alpha), [beta]"r"((mips_reg)beta),
1560 [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
1561 [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
1562 : "memory"
1563 );
1564 }
1565
deblock_v8_luma_intra_8_mmi(uint8_t * pix,ptrdiff_t stride,int alpha,int beta)1566 static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1567 int beta)
1568 {
1569 DECLARE_ALIGNED(8, const uint64_t, stack[0x0a]);
1570 double ftmp[16];
1571 uint64_t tmp[1];
1572 mips_reg addr[3];
1573 DECLARE_VAR_ALL64;
1574 DECLARE_VAR_ADDRT;
1575
1576 __asm__ volatile (
1577 "ori %[tmp0], $0, 0x01 \n\t"
1578 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1579 "mtc1 %[tmp0], %[ftmp9] \n\t"
1580 PTR_SLL "%[addr0], %[stride], 0x02 \n\t"
1581 PTR_ADDU "%[addr2], %[stride], %[stride] \n\t"
1582 PTR_ADDIU "%[alpha], %[alpha], -0x01 \n\t"
1583 "sslld %[ftmp11], %[ftmp9], %[ftmp9] \n\t"
1584 "bltz %[alpha], 1f \n\t"
1585 PTR_ADDU "%[addr1], %[addr2], %[stride] \n\t"
1586 PTR_ADDIU "%[beta], %[beta], -0x01 \n\t"
1587 "bltz %[beta], 1f \n\t"
1588 PTR_SUBU "%[addr0], $0, %[addr0] \n\t"
1589 PTR_ADDU "%[addr0], %[addr0], %[pix] \n\t"
1590 MMI_LDC1(%[ftmp3], %[pix], 0x00)
1591 MMI_LDXC1(%[ftmp1], %[addr0], %[addr2], 0x00)
1592 MMI_LDXC1(%[ftmp2], %[addr0], %[addr1], 0x00)
1593 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1594 "mtc1 %[alpha], %[ftmp5] \n\t"
1595 "mtc1 %[beta], %[ftmp6] \n\t"
1596 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1597 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1598 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1599 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1600 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1601 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1602 "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1603 MMI_SDC1(%[ftmp5], %[stack], 0x10)
1604 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1605 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1606 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1607 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1608 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1609 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1610 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1611 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1612 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1613 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1614 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1615 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1616 MMI_LDC1(%[ftmp5], %[stack], 0x10)
1617 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1618 "ldc1 %[ftmp10], %[ff_pb_1] \n\t"
1619 MMI_SDC1(%[ftmp8], %[stack], 0x20)
1620 "pavgb %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1621 "psubusb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1622 "pavgb %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
1623 "psubusb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1624 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1625 "psubusb %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1626 MMI_LDC1(%[ftmp15], %[stack], 0x20)
1627 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1628 "pand %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
1629 MMI_LDXC1(%[ftmp15], %[addr0], %[stride], 0x00)
1630 "psubusb %[ftmp8], %[ftmp15], %[ftmp2] \n\t"
1631 "psubusb %[ftmp5], %[ftmp2], %[ftmp15] \n\t"
1632 "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1633 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1634 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1635 "pand %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1636 MMI_LDXC1(%[ftmp14], %[pix], %[addr2], 0x00)
1637 MMI_SDC1(%[ftmp5], %[stack], 0x30)
1638 "psubusb %[ftmp8], %[ftmp14], %[ftmp3] \n\t"
1639 "psubusb %[ftmp5], %[ftmp3], %[ftmp14] \n\t"
1640 "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1641 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1642 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1643 "pand %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1644 MMI_SDC1(%[ftmp5], %[stack], 0x40)
1645 "pavgb %[ftmp5], %[ftmp15], %[ftmp1] \n\t"
1646 "pavgb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1647 "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1648 MMI_SDC1(%[ftmp6], %[stack], 0x10)
1649 "paddb %[ftmp7], %[ftmp15], %[ftmp1] \n\t"
1650 "paddb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1651 "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1652 "mov.d %[ftmp8], %[ftmp7] \n\t"
1653 MMI_SDC1(%[ftmp7], %[stack], 0x00)
1654 "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1655 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1656 "pxor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1657 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1658 "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1659 "pavgb %[ftmp6], %[ftmp15], %[ftmp4] \n\t"
1660 "psubb %[ftmp7], %[ftmp15], %[ftmp4] \n\t"
1661 "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1662 "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1663 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1664 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1665 MMI_LDC1(%[ftmp13], %[stack], 0x10)
1666 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1667 "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1668 "pavgb %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1669 "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1670 "pxor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1671 "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1672 "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1673 "pxor %[ftmp8], %[ftmp2], %[ftmp4] \n\t"
1674 "pavgb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1675 "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1676 "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1677 MMI_LDC1(%[ftmp13], %[stack], 0x30)
1678 "pavgb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1679 MMI_LDC1(%[ftmp12], %[stack], 0x20)
1680 "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1681 "pxor %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
1682 "pand %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1683 "pand %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1684 "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1685 "pxor %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1686 MMI_SDXC1(%[ftmp6], %[addr0], %[addr1], 0x00)
1687 MMI_LDC1(%[ftmp6], %[addr0], 0x00)
1688 "paddb %[ftmp7], %[ftmp15], %[ftmp6] \n\t"
1689 "pavgb %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1690 MMI_LDC1(%[ftmp12], %[stack], 0x00)
1691 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1692 "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1693 "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1694 "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1695 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1696 "pxor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1697 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1698 MMI_LDC1(%[ftmp12], %[stack], 0x30)
1699 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1700 "pxor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1701 "pxor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1702 "pand %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1703 "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1704 "pxor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1705 "pxor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1706 MMI_SDXC1(%[ftmp5], %[addr0], %[addr2], 0x00)
1707 MMI_SDXC1(%[ftmp6], %[addr0], %[stride], 0x00)
1708 "pavgb %[ftmp5], %[ftmp14], %[ftmp4] \n\t"
1709 "pavgb %[ftmp6], %[ftmp3], %[ftmp2] \n\t"
1710 "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1711 MMI_SDC1(%[ftmp6], %[stack], 0x10)
1712 "paddb %[ftmp7], %[ftmp14], %[ftmp4] \n\t"
1713 "paddb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1714 "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1715 "mov.d %[ftmp8], %[ftmp7] \n\t"
1716 MMI_SDC1(%[ftmp7], %[stack], 0x00)
1717 "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1718 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1719 "pxor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1720 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1721 "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1722 "pavgb %[ftmp6], %[ftmp14], %[ftmp1] \n\t"
1723 "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1724 "psubb %[ftmp7], %[ftmp14], %[ftmp1] \n\t"
1725 "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1726 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1727 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1728 MMI_LDC1(%[ftmp12], %[stack], 0x10)
1729 "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1730 "pavgb %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1731 "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1732 "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1733 "pxor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1734 "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1735 "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1736 "pxor %[ftmp8], %[ftmp3], %[ftmp1] \n\t"
1737 "pavgb %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
1738 "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1739 MMI_LDC1(%[ftmp12], %[stack], 0x40)
1740 "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1741 MMI_LDC1(%[ftmp13], %[stack], 0x20)
1742 "pavgb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1743 "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1744 "pxor %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1745 "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1746 "pand %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
1747 "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1748 "pxor %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1749 MMI_SDC1(%[ftmp6], %[pix], 0x00)
1750 MMI_LDXC1(%[ftmp6], %[pix], %[addr1], 0x00)
1751 "paddb %[ftmp7], %[ftmp14], %[ftmp6] \n\t"
1752 "pavgb %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1753 MMI_LDC1(%[ftmp12], %[stack], 0x00)
1754 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1755 "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1756 "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1757 "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1758 "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1759 "pxor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1760 "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1761 MMI_LDC1(%[ftmp12], %[stack], 0x40)
1762 "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1763 "pxor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1764 "pxor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1765 "pand %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1766 "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1767 "pxor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1768 "pxor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1769 MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1770 MMI_SDXC1(%[ftmp6], %[pix], %[addr2], 0x00)
1771 "1: \n\t"
1772 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1773 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1774 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1775 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1776 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1777 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1778 [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
1779 [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
1780 [tmp0]"=&r"(tmp[0]),
1781 RESTRICT_ASM_ALL64
1782 RESTRICT_ASM_ADDRT
1783 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
1784 [addr2]"=&r"(addr[2]),
1785 [alpha]"+&r"(alpha), [beta]"+&r"(beta)
1786 : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1787 [stack]"r"(stack), [ff_pb_1]"m"(ff_pb_1)
1788 : "memory"
1789 );
1790 }
1791
ff_deblock_v_chroma_8_mmi(uint8_t * pix,ptrdiff_t stride,int alpha,int beta,int8_t * tc0)1792 void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1793 int beta, int8_t *tc0)
1794 {
1795 double ftmp[9];
1796 mips_reg addr[1];
1797 DECLARE_VAR_LOW32;
1798 DECLARE_VAR_ALL64;
1799 DECLARE_VAR_ADDRT;
1800
1801 __asm__ volatile (
1802 "addi %[alpha], %[alpha], -0x01 \n\t"
1803 "addi %[beta], %[beta], -0x01 \n\t"
1804 "or %[addr0], $0, %[pix] \n\t"
1805 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1806 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1807 MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1808 MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1809 MMI_LDC1(%[ftmp3], %[pix], 0x00)
1810 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1811
1812 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1813 "mtc1 %[alpha], %[ftmp5] \n\t"
1814 "mtc1 %[beta], %[ftmp6] \n\t"
1815 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1816 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1817 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1818 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1819 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1820 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1821 "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1822 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1823 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1824 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1825 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1826 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1827 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1828 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1829 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1830 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1831 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1832 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1833 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1834 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1835 MMI_ULWC1(%[ftmp7], %[tc0], 0x00)
1836 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1837 "pand %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1838 "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1839 "pxor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1840 "pxor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1841 "pand %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1842 "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1843 "pxor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1844 "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1845 "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1846 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1847 "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1848 "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1849 "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1850 "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1851 "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1852 "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1853 "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1854 "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1855 "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1856
1857 MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1858 MMI_SDC1(%[ftmp3], %[pix], 0x00)
1859 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1860 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1861 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1862 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1863 [ftmp8]"=&f"(ftmp[8]),
1864 RESTRICT_ASM_LOW32
1865 RESTRICT_ASM_ALL64
1866 RESTRICT_ASM_ADDRT
1867 [addr0]"=&r"(addr[0])
1868 : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1869 [alpha]"r"(alpha), [beta]"r"(beta),
1870 [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
1871 [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
1872 : "memory"
1873 );
1874 }
1875
ff_deblock_v_chroma_intra_8_mmi(uint8_t * pix,ptrdiff_t stride,int alpha,int beta)1876 void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1877 int beta)
1878 {
1879 double ftmp[9];
1880 mips_reg addr[1];
1881 DECLARE_VAR_ALL64;
1882 DECLARE_VAR_ADDRT;
1883
1884 __asm__ volatile (
1885 "addi %[alpha], %[alpha], -0x01 \n\t"
1886 "addi %[beta], %[beta], -0x01 \n\t"
1887 "or %[addr0], $0, %[pix] \n\t"
1888 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1889 PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1890 MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1891 MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1892 MMI_LDC1(%[ftmp3], %[pix], 0x00)
1893 MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1894
1895 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1896 "mtc1 %[alpha], %[ftmp5] \n\t"
1897 "mtc1 %[beta], %[ftmp6] \n\t"
1898 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1899 "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1900 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1901 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1902 "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1903 "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1904 "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1905 "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1906 "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1907 "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1908 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1909 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1910 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1911 "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1912 "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1913 "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1914 "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1915 "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1916 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1917 "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1918 "mov.d %[ftmp6], %[ftmp2] \n\t"
1919 "mov.d %[ftmp7], %[ftmp3] \n\t"
1920 "pxor %[ftmp5], %[ftmp2], %[ftmp4] \n\t"
1921 "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1922 "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1923 "psubusb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1924 "pavgb %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1925 "pxor %[ftmp5], %[ftmp3], %[ftmp1] \n\t"
1926 "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1927 "pavgb %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1928 "psubusb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1929 "pavgb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1930 "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1931 "psubb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1932 "pand %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1933 "pand %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1934 "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1935 "paddb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1936
1937 MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1938 MMI_SDC1(%[ftmp3], %[pix], 0x00)
1939 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1940 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1941 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1942 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1943 [ftmp8]"=&f"(ftmp[8]),
1944 RESTRICT_ASM_ALL64
1945 RESTRICT_ASM_ADDRT
1946 [addr0]"=&r"(addr[0])
1947 : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1948 [alpha]"r"(alpha), [beta]"r"(beta),
1949 [ff_pb_1]"f"(ff_pb_1.f)
1950 : "memory"
1951 );
1952 }
1953
ff_deblock_h_chroma_8_mmi(uint8_t * pix,ptrdiff_t stride,int alpha,int beta,int8_t * tc0)1954 void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1955 int8_t *tc0)
1956 {
1957 double ftmp[11];
1958 mips_reg addr[6];
1959 DECLARE_VAR_LOW32;
1960
1961 __asm__ volatile (
1962 "addi %[alpha], %[alpha], -0x01 \n\t"
1963 "addi %[beta], %[beta], -0x01 \n\t"
1964 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1965 PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
1966 PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
1967 PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
1968 "or %[addr5], $0, %[pix] \n\t"
1969 PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
1970 MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
1971 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
1972 MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
1973 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
1974 MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
1975 MMI_ULWC1(%[ftmp3], %[pix], 0x00)
1976 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1977 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1978 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
1979 "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
1980 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1981 MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
1982 PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
1983 MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
1984 PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
1985 MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
1986 PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
1987 MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
1988 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1989 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1990 "mov.d %[ftmp6], %[ftmp4] \n\t"
1991 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1992 "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1993 "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
1994 "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
1995 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1996 "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1997 "mov.d %[ftmp9], %[ftmp0] \n\t"
1998 "mov.d %[ftmp10], %[ftmp3] \n\t"
1999
2000 "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2001 "mtc1 %[alpha], %[ftmp4] \n\t"
2002 "mtc1 %[beta], %[ftmp5] \n\t"
2003 "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2004 "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2005 "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2006 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2007 "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2008 "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2009 "por %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2010 "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2011 "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2012 "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2013 "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2014 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2015 "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2016 "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2017 "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2018 "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2019 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2020 "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2021 "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2022 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2023 MMI_ULWC1(%[ftmp6], %[tc0], 0x00)
2024 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2025 "pand %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2026 "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2027 "pxor %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
2028 "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2029 "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
2030 "pavgb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
2031 "pxor %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
2032 "pavgb %[ftmp3], %[ftmp3], %[ff_pb_3] \n\t"
2033 "pavgb %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
2034 "pavgb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2035 "paddusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2036 "psubusb %[ftmp6], %[ff_pb_A1], %[ftmp3] \n\t"
2037 "psubusb %[ftmp3], %[ftmp3], %[ff_pb_A1] \n\t"
2038 "pminub %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
2039 "pminub %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
2040 "psubusb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
2041 "psubusb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2042 "paddusb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2043 "paddusb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2044
2045 "punpckhwd %[ftmp4], %[ftmp9], %[ftmp9] \n\t"
2046 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2047 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2048 "punpcklbh %[ftmp0], %[ftmp9], %[ftmp1] \n\t"
2049 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
2050 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2051 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2052 MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2053 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2054 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2055 MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2056 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2057 MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2058 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2059 "punpckhwd %[ftmp3], %[ftmp10], %[ftmp10] \n\t"
2060 MMI_USWC1(%[ftmp0], %[pix], 0x00)
2061 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2062 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2063 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2064 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2065 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2066 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2067 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2068 PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2069 PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2070 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2071 MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2072 PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2073 "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2074 MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2075 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2076 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2077 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2078 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2079 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2080 [ftmp10]"=&f"(ftmp[10]),
2081 RESTRICT_ASM_LOW32
2082 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2083 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2084 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2085 [pix]"+&r"(pix)
2086 : [alpha]"r"(alpha), [beta]"r"(beta),
2087 [stride]"r"((mips_reg)stride), [tc0]"r"(tc0),
2088 [ff_pb_1]"f"(ff_pb_1.f), [ff_pb_3]"f"(ff_pb_3.f),
2089 [ff_pb_A1]"f"(ff_pb_A1.f)
2090 : "memory"
2091 );
2092 }
2093
ff_deblock_h_chroma_intra_8_mmi(uint8_t * pix,ptrdiff_t stride,int alpha,int beta)2094 void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2095 int beta)
2096 {
2097 double ftmp[11];
2098 mips_reg addr[6];
2099 DECLARE_VAR_LOW32;
2100
2101 __asm__ volatile (
2102 "addi %[alpha], %[alpha], -0x01 \n\t"
2103 "addi %[beta], %[beta], -0x01 \n\t"
2104 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2105 PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
2106 PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
2107 PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
2108 "or %[addr5], $0, %[pix] \n\t"
2109 PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
2110 MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
2111 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2112 MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
2113 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2114 MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
2115 MMI_ULWC1(%[ftmp3], %[pix], 0x00)
2116 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2117 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2118 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2119 "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
2120 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2121 MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
2122 PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
2123 MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
2124 PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
2125 MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
2126 PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
2127 MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
2128 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2129 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
2130 "mov.d %[ftmp6], %[ftmp4] \n\t"
2131 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2132 "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
2133 "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
2134 "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
2135 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2136 "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2137
2138 "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2139 "mtc1 %[alpha], %[ftmp4] \n\t"
2140 "mtc1 %[beta], %[ftmp5] \n\t"
2141 "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2142 "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2143 "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2144 "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2145 "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2146 "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2147 "por %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2148 "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2149 "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2150 "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2151 "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2152 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2153 "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2154 "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2155 "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2156 "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2157 "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2158 "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2159 "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2160 "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2161 "mov.d %[ftmp5], %[ftmp1] \n\t"
2162 "mov.d %[ftmp6], %[ftmp2] \n\t"
2163 "pxor %[ftmp4], %[ftmp1], %[ftmp3] \n\t"
2164 "pand %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2165 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2166 "psubusb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
2167 "pavgb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2168 "pxor %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2169 "pand %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2170 "pavgb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2171 "psubusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2172 "pavgb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2173 "psubb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2174 "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2175 "pand %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2176 "pand %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2177 "paddb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2178 "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2179
2180 "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2181 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2182 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2183 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2184 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2185 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2186 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2187 MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2188 PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2189 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2190 PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2191 MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2192 MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2193 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2194 "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2195 MMI_USWC1(%[ftmp0], %[pix], 0x00)
2196 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2197 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2198 PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2199 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2200 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2201 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2202 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2203 PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2204 PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2205 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2206 PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2207 MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2208 "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2209 MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2210 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2211 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2212 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2213 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2214 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2215 [ftmp10]"=&f"(ftmp[10]),
2216 RESTRICT_ASM_LOW32
2217 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2218 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2219 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2220 [pix]"+&r"(pix)
2221 : [alpha]"r"(alpha), [beta]"r"(beta),
2222 [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1.f)
2223 : "memory"
2224 );
2225 }
2226
ff_deblock_v_luma_8_mmi(uint8_t * pix,ptrdiff_t stride,int alpha,int beta,int8_t * tc0)2227 void ff_deblock_v_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2228 int8_t *tc0)
2229 {
2230 if ((tc0[0] & tc0[1]) >= 0)
2231 ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0);
2232 if ((tc0[2] & tc0[3]) >= 0)
2233 ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2);
2234 }
2235
ff_deblock_v_luma_intra_8_mmi(uint8_t * pix,ptrdiff_t stride,int alpha,int beta)2236 void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2237 int beta)
2238 {
2239 deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta);
2240 deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta);
2241 }
2242
ff_deblock_h_luma_8_mmi(uint8_t * pix,ptrdiff_t stride,int alpha,int beta,int8_t * tc0)2243 void ff_deblock_h_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2244 int8_t *tc0)
2245 {
2246 DECLARE_ALIGNED(8, const uint64_t, stack[0x0d]);
2247 double ftmp[9];
2248 mips_reg addr[8];
2249 DECLARE_VAR_LOW32;
2250 DECLARE_VAR_ALL64;
2251
2252 __asm__ volatile (
2253 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2254 PTR_ADDI "%[addr1], %[pix], -0x4 \n\t"
2255 PTR_ADDU "%[addr2], %[stride], %[addr0] \n\t"
2256 MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2257 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2258 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2259 MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2260 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2261 MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2262 MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2263 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2264 MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2265 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2266 MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2267 PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2268 MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2269 PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2270 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2271 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2272 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2273 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2274 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2275 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2276 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2277 MMI_SDC1(%[ftmp1], %[stack], 0x10)
2278 MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2279 PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2280 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2281 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2282 "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2283 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2284 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2285 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2286 MMI_LDC1(%[ftmp8], %[stack], 0x10)
2287 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2288 MMI_SDC1(%[ftmp0], %[stack], 0x00)
2289 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2290 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2291 "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2292 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2293 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2294 "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2295 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2296 "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2297 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2298 MMI_SDC1(%[ftmp1], %[stack], 0x10)
2299 MMI_SDC1(%[ftmp3], %[stack], 0x20)
2300 MMI_SDC1(%[ftmp7], %[stack], 0x30)
2301 MMI_SDC1(%[ftmp5], %[stack], 0x40)
2302 MMI_SDC1(%[ftmp6], %[stack], 0x50)
2303 PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2304 PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2305 MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2306 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2307 MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2308 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2309 MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2310 MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2311 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2312 MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2313 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2314 MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2315 PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2316 MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2317 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2318 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2319 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2320 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2321 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2322 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2323 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2324 MMI_SDC1(%[ftmp1], %[stack], 0x18)
2325 MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2326 "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2327 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2328 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2329 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2330 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2331 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2332 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2333 MMI_LDC1(%[ftmp8], %[stack], 0x18)
2334 MMI_SDC1(%[ftmp0], %[stack], 0x08)
2335 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2336 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2337 "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2338 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2339 "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2340 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2341 "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2342 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2343 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2344 MMI_SDC1(%[ftmp1], %[stack], 0x18)
2345 MMI_SDC1(%[ftmp3], %[stack], 0x28)
2346 MMI_SDC1(%[ftmp7], %[stack], 0x38)
2347 MMI_SDC1(%[ftmp5], %[stack], 0x48)
2348 MMI_SDC1(%[ftmp6], %[stack], 0x58)
2349 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2350 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2351 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2352 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2353 [ftmp8]"=&f"(ftmp[8]),
2354 RESTRICT_ASM_ALL64
2355 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2356 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2357 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2358 [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2359 : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2360 [stack]"r"(stack)
2361 : "memory"
2362 );
2363
2364 ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0);
2365
2366 __asm__ volatile (
2367 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2368 PTR_ADDI "%[addr1], %[pix], -0x02 \n\t"
2369 PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2370 PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2371 PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2372 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2373 MMI_LDC1(%[ftmp0], %[stack], 0x10)
2374 MMI_LDC1(%[ftmp1], %[stack], 0x20)
2375 MMI_LDC1(%[ftmp2], %[stack], 0x30)
2376 MMI_LDC1(%[ftmp3], %[stack], 0x40)
2377 "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2378 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2379 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2380 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2381 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2382 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2383 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2384 MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2385 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2386 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2387 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2388 MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2389 MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2390 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2391 "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2392 MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2393 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2394 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2395 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2396 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2397 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2398 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2399 PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2400 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2401 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2402 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2403 MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2404 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2405 "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2406 PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2407 MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2408 PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2409 MMI_LDC1(%[ftmp0], %[stack], 0x18)
2410 MMI_LDC1(%[ftmp1], %[stack], 0x28)
2411 MMI_LDC1(%[ftmp2], %[stack], 0x38)
2412 MMI_LDC1(%[ftmp3], %[stack], 0x48)
2413 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2414 "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2415 PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2416 "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2417 "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2418 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2419 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2420 PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2421 "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2422 "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2423 MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2424 "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2425 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2426 MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2427 MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2428 "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2429 "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2430 MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2431 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2432 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2433 PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2434 "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2435 "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2436 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2437 PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2438 "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2439 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2440 MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2441 MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2442 PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2443 "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2444 MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2445 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2446 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2447 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2448 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2449 [ftmp8]"=&f"(ftmp[8]),
2450 RESTRICT_ASM_LOW32
2451 RESTRICT_ASM_ALL64
2452 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2453 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2454 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2455 [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2456 : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2457 [stack]"r"(stack)
2458 : "memory"
2459 );
2460 }
2461
ff_deblock_h_luma_intra_8_mmi(uint8_t * pix,ptrdiff_t stride,int alpha,int beta)2462 void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2463 int beta)
2464 {
2465 DECLARE_ALIGNED(8, const uint64_t, ptmp[0x11]);
2466 DECLARE_ALIGNED(8, const uint64_t, pdat[0x04]);
2467 double ftmp[9];
2468 mips_reg addr[7];
2469 DECLARE_VAR_ALL64;
2470
2471 __asm__ volatile (
2472 PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2473 PTR_ADDI "%[addr1], %[pix], -0x04 \n\t"
2474 PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2475 PTR_ADDU "%[addr3], %[addr0], %[addr0] \n\t"
2476 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2477 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2478 MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2479 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2480 MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2481 MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2482 PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2483 MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2484 PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2485 MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2486 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2487 MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2488 MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2489 PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2490 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2491 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2492 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2493 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2494 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2495 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2496 MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2497 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2498 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2499 MMI_SDC1(%[ftmp3], %[ptmp], 0x00)
2500 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2501 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2502 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2503 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2504 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2505 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2506 MMI_SDC1(%[ftmp2], %[ptmp], 0x20)
2507 MMI_LDC1(%[ftmp2], %[ptmp], 0x00)
2508 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2509 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2510 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2511 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2512 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2513 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2514 MMI_SDC1(%[ftmp0], %[ptmp], 0x00)
2515 MMI_SDC1(%[ftmp5], %[ptmp], 0x10)
2516 MMI_SDC1(%[ftmp7], %[ptmp], 0x40)
2517 MMI_SDC1(%[ftmp4], %[ptmp], 0x50)
2518 MMI_LDC1(%[ftmp8], %[ptmp], 0x20)
2519 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2520 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2521 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2522 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2523 PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2524 MMI_SDC1(%[ftmp3], %[ptmp], 0x20)
2525 MMI_SDC1(%[ftmp0], %[ptmp], 0x30)
2526 MMI_SDC1(%[ftmp6], %[ptmp], 0x60)
2527 MMI_SDC1(%[ftmp5], %[ptmp], 0x70)
2528 PTR_ADDU "%[addr1], %[addr1], %[addr5] \n\t"
2529 PTR_ADDU "%[addr4], %[addr4], %[addr5] \n\t"
2530 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2531 MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2532 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2533 MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2534 MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2535 PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2536 MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2537 PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2538 MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2539 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2540 MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2541 MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2542 PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2543 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2544 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2545 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2546 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2547 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2548 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2549 MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2550 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2551 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2552 MMI_SDC1(%[ftmp3], %[ptmp], 0x08)
2553 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2554 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2555 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2556 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2557 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2558 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2559 MMI_SDC1(%[ftmp2], %[ptmp], 0x28)
2560 MMI_LDC1(%[ftmp2], %[ptmp], 0x08)
2561 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2562 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2563 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2564 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2565 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2566 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2567 MMI_SDC1(%[ftmp0], %[ptmp], 0x08)
2568 MMI_SDC1(%[ftmp5], %[ptmp], 0x18)
2569 MMI_SDC1(%[ftmp7], %[ptmp], 0x48)
2570 MMI_SDC1(%[ftmp4], %[ptmp], 0x58)
2571 MMI_LDC1(%[ftmp8], %[ptmp], 0x28)
2572 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2573 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2574 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2575 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2576 MMI_SDC1(%[ftmp3], %[ptmp], 0x28)
2577 MMI_SDC1(%[ftmp0], %[ptmp], 0x38)
2578 MMI_SDC1(%[ftmp6], %[ptmp], 0x68)
2579 MMI_SDC1(%[ftmp5], %[ptmp], 0x78)
2580 PTR_S "%[addr1], 0x00(%[pdat]) \n\t"
2581 PTR_S "%[addr2], 0x08(%[pdat]) \n\t"
2582 PTR_S "%[addr0], 0x10(%[pdat]) \n\t"
2583 PTR_S "%[addr3], 0x18(%[pdat]) \n\t"
2584 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2585 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2586 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2587 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2588 [ftmp8]"=&f"(ftmp[8]),
2589 RESTRICT_ASM_ALL64
2590 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2591 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2592 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2593 [addr6]"=&r"(addr[6])
2594 : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2595 [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2596 : "memory"
2597 );
2598
2599 ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta);
2600
2601 __asm__ volatile (
2602 PTR_L "%[addr1], 0x00(%[pdat]) \n\t"
2603 PTR_L "%[addr2], 0x08(%[pdat]) \n\t"
2604 PTR_L "%[addr0], 0x10(%[pdat]) \n\t"
2605 PTR_L "%[addr3], 0x18(%[pdat]) \n\t"
2606 PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2607 MMI_LDC1(%[ftmp0], %[ptmp], 0x08)
2608 MMI_LDC1(%[ftmp1], %[ptmp], 0x18)
2609 MMI_LDC1(%[ftmp2], %[ptmp], 0x28)
2610 MMI_LDC1(%[ftmp3], %[ptmp], 0x38)
2611 MMI_LDC1(%[ftmp4], %[ptmp], 0x48)
2612 MMI_LDC1(%[ftmp5], %[ptmp], 0x58)
2613 MMI_LDC1(%[ftmp6], %[ptmp], 0x68)
2614 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2615 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2616 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2617 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2618 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2619 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2620 MMI_LDC1(%[ftmp8], %[ptmp], 0x78)
2621 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2622 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2623 MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2624 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2625 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2626 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2627 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2628 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2629 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2630 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2631 MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2632 MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2633 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2634 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2635 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2636 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2637 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2638 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2639 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2640 MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2641 PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2642 MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2643 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2644 MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2645 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2646 MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2647 MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2648 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2649 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2650 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2651 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2652 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2653 MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2654 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2655 MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2656 PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2657 MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2658 PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2659 MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2660 PTR_SUBU "%[addr1], %[addr1], %[addr5] \n\t"
2661 PTR_SUBU "%[addr4], %[addr4], %[addr5] \n\t"
2662 MMI_LDC1(%[ftmp0], %[ptmp], 0x00)
2663 MMI_LDC1(%[ftmp1], %[ptmp], 0x10)
2664 MMI_LDC1(%[ftmp2], %[ptmp], 0x20)
2665 MMI_LDC1(%[ftmp3], %[ptmp], 0x30)
2666 MMI_LDC1(%[ftmp4], %[ptmp], 0x40)
2667 MMI_LDC1(%[ftmp5], %[ptmp], 0x50)
2668 MMI_LDC1(%[ftmp6], %[ptmp], 0x60)
2669 "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2670 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2671 "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2672 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2673 "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2674 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2675 MMI_LDC1(%[ftmp8], %[ptmp], 0x70)
2676 "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2677 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2678 MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2679 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2680 "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2681 "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2682 "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2683 "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2684 "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2685 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2686 MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2687 MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2688 "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2689 "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2690 "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2691 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2692 "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2693 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2694 PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2695 MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2696 PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2697 MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2698 PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2699 MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2700 PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2701 MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2702 MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2703 PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2704 "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2705 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2706 "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2707 "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2708 MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2709 PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2710 MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2711 PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2712 MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2713 MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2714 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2715 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2716 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2717 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2718 [ftmp8]"=&f"(ftmp[8]),
2719 RESTRICT_ASM_ALL64
2720 [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2721 [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2722 [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2723 [addr6]"=&r"(addr[6])
2724 : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2725 [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2726 : "memory"
2727 );
2728 }
2729