1 /*
2 * Loongson SIMD optimized mpegvideo
3 *
4 * Copyright (c) 2015 Loongson Technology Corporation Limited
5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6 * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include "mpegvideo_mips.h"
26 #include "libavutil/mips/mmiutils.h"
27
ff_dct_unquantize_h263_intra_mmi(MpegEncContext * s,int16_t * block,int n,int qscale)28 void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
29 int n, int qscale)
30 {
31 int64_t level, nCoeffs;
32 double ftmp[6];
33 mips_reg addr[1];
34 union mmi_intfloat64 qmul_u, qadd_u;
35 DECLARE_VAR_ALL64;
36
37 qmul_u.i = qscale << 1;
38 av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
39
40 if (!s->h263_aic) {
41 if (n<4)
42 level = block[0] * s->y_dc_scale;
43 else
44 level = block[0] * s->c_dc_scale;
45 qadd_u.i = (qscale-1) | 1;
46 } else {
47 qadd_u.i = 0;
48 level = block[0];
49 }
50
51 if(s->ac_pred)
52 nCoeffs = 63;
53 else
54 nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
55
56 __asm__ volatile (
57 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
58 "packsswh %[qmul], %[qmul], %[qmul] \n\t"
59 "packsswh %[qmul], %[qmul], %[qmul] \n\t"
60 "packsswh %[qadd], %[qadd], %[qadd] \n\t"
61 "packsswh %[qadd], %[qadd], %[qadd] \n\t"
62 "psubh %[ftmp0], %[ftmp0], %[qadd] \n\t"
63 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
64 ".p2align 4 \n\t"
65
66 "1: \n\t"
67 PTR_ADDU "%[addr0], %[block], %[nCoeffs] \n\t"
68 MMI_LDC1(%[ftmp1], %[addr0], 0x00)
69 MMI_LDC1(%[ftmp2], %[addr0], 0x08)
70 "mov.d %[ftmp3], %[ftmp1] \n\t"
71 "mov.d %[ftmp4], %[ftmp2] \n\t"
72 "pmullh %[ftmp1], %[ftmp1], %[qmul] \n\t"
73 "pmullh %[ftmp2], %[ftmp2], %[qmul] \n\t"
74 "pcmpgth %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
75 "pcmpgth %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
76 "pxor %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
77 "pxor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
78 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
79 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
80 "pxor %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
81 "pxor %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
82 "pcmpeqh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
83 "pcmpeqh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
84 "pandn %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
85 "pandn %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
86 PTR_ADDIU "%[nCoeffs], %[nCoeffs], 0x10 \n\t"
87 MMI_SDC1(%[ftmp1], %[addr0], 0x00)
88 MMI_SDC1(%[ftmp2], %[addr0], 0x08)
89 "blez %[nCoeffs], 1b \n\t"
90 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
91 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
92 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
93 RESTRICT_ASM_ALL64
94 [addr0]"=&r"(addr[0])
95 : [block]"r"((mips_reg)(block+nCoeffs)),
96 [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
97 [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f)
98 : "memory"
99 );
100
101 block[0] = level;
102 }
103
ff_dct_unquantize_h263_inter_mmi(MpegEncContext * s,int16_t * block,int n,int qscale)104 void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
105 int n, int qscale)
106 {
107 int64_t nCoeffs;
108 double ftmp[6];
109 mips_reg addr[1];
110 union mmi_intfloat64 qmul_u, qadd_u;
111 DECLARE_VAR_ALL64;
112
113 qmul_u.i = qscale << 1;
114 qadd_u.i = (qscale - 1) | 1;
115 av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
116 nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
117
118 __asm__ volatile (
119 "packsswh %[qmul], %[qmul], %[qmul] \n\t"
120 "packsswh %[qmul], %[qmul], %[qmul] \n\t"
121 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
122 "packsswh %[qadd], %[qadd], %[qadd] \n\t"
123 "packsswh %[qadd], %[qadd], %[qadd] \n\t"
124 "psubh %[ftmp0], %[ftmp0], %[qadd] \n\t"
125 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
126 ".p2align 4 \n\t"
127 "1: \n\t"
128 PTR_ADDU "%[addr0], %[block], %[nCoeffs] \n\t"
129 MMI_LDC1(%[ftmp1], %[addr0], 0x00)
130 MMI_LDC1(%[ftmp2], %[addr0], 0x08)
131 "mov.d %[ftmp3], %[ftmp1] \n\t"
132 "mov.d %[ftmp4], %[ftmp2] \n\t"
133 "pmullh %[ftmp1], %[ftmp1], %[qmul] \n\t"
134 "pmullh %[ftmp2], %[ftmp2], %[qmul] \n\t"
135 "pcmpgth %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
136 "pcmpgth %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
137 "pxor %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
138 "pxor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
139 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
140 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
141 "pxor %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
142 "pxor %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
143 "pcmpeqh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
144 "pcmpeqh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
145 "pandn %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
146 "pandn %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
147 PTR_ADDIU "%[nCoeffs], %[nCoeffs], 0x10 \n\t"
148 MMI_SDC1(%[ftmp1], %[addr0], 0x00)
149 MMI_SDC1(%[ftmp2], %[addr0], 0x08)
150 "blez %[nCoeffs], 1b \n\t"
151 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
152 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
153 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
154 RESTRICT_ASM_ALL64
155 [addr0]"=&r"(addr[0])
156 : [block]"r"((mips_reg)(block+nCoeffs)),
157 [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
158 [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f)
159 : "memory"
160 );
161 }
162
ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext * s,int16_t * block,int n,int qscale)163 void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block,
164 int n, int qscale)
165 {
166 int64_t nCoeffs;
167 const uint16_t *quant_matrix;
168 int block0;
169 double ftmp[10];
170 uint64_t tmp[1];
171 mips_reg addr[1];
172 DECLARE_VAR_ALL64;
173 DECLARE_VAR_ADDRT;
174
175 av_assert2(s->block_last_index[n]>=0);
176 nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
177
178 if (n<4)
179 block0 = block[0] * s->y_dc_scale;
180 else
181 block0 = block[0] * s->c_dc_scale;
182
183 /* XXX: only mpeg1 */
184 quant_matrix = s->intra_matrix;
185
186 __asm__ volatile (
187 "dli %[tmp0], 0x0f \n\t"
188 "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
189 "dmtc1 %[tmp0], %[ftmp4] \n\t"
190 "dmtc1 %[qscale], %[ftmp1] \n\t"
191 "psrlh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
192 "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
193 "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
194 "or %[addr0], %[nCoeffs], $0 \n\t"
195 ".p2align 4 \n\t"
196
197 "1: \n\t"
198 MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x00)
199 MMI_LDXC1(%[ftmp3], %[addr0], %[block], 0x08)
200 "mov.d %[ftmp4], %[ftmp2] \n\t"
201 "mov.d %[ftmp5], %[ftmp3] \n\t"
202 MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x00)
203 MMI_LDXC1(%[ftmp7], %[addr0], %[quant], 0x08)
204 "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
205 "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
206 "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
207 "pxor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
208 "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
209 "pcmpgth %[ftmp9], %[ftmp9], %[ftmp3] \n\t"
210 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
211 "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
212 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
213 "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
214 "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
215 "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
216 "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
217 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
218 "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
219 "dli %[tmp0], 0x03 \n\t"
220 "pcmpeqh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
221 "dmtc1 %[tmp0], %[ftmp4] \n\t"
222 "psrah %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
223 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
224 "psubh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
225 "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
226 "por %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
227 "por %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
228 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
229 "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
230 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
231 "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
232 "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
233 "pandn %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
234 MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x00)
235 MMI_SDXC1(%[ftmp7], %[addr0], %[block], 0x08)
236 PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
237 "bltz %[addr0], 1b \n\t"
238 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
239 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
240 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
241 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
242 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
243 [tmp0]"=&r"(tmp[0]),
244 RESTRICT_ASM_ALL64
245 RESTRICT_ASM_ADDRT
246 [addr0]"=&r"(addr[0])
247 : [block]"r"((mips_reg)(block+nCoeffs)),
248 [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
249 [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
250 [qscale]"r"(qscale)
251 : "memory"
252 );
253
254 block[0] = block0;
255 }
256
ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext * s,int16_t * block,int n,int qscale)257 void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
258 int n, int qscale)
259 {
260 int64_t nCoeffs;
261 const uint16_t *quant_matrix;
262 double ftmp[10];
263 uint64_t tmp[1];
264 mips_reg addr[1];
265 DECLARE_VAR_ALL64;
266 DECLARE_VAR_ADDRT;
267
268 av_assert2(s->block_last_index[n] >= 0);
269 nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
270 quant_matrix = s->inter_matrix;
271
272 __asm__ volatile (
273 "dli %[tmp0], 0x0f \n\t"
274 "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
275 "dmtc1 %[tmp0], %[ftmp4] \n\t"
276 "dmtc1 %[qscale], %[ftmp1] \n\t"
277 "psrlh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
278 "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
279 "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
280 "or %[addr0], %[nCoeffs], $0 \n\t"
281 ".p2align 4 \n\t"
282
283 "1: \n\t"
284 MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x00)
285 MMI_LDXC1(%[ftmp3], %[addr0], %[block], 0x08)
286 "mov.d %[ftmp4], %[ftmp2] \n\t"
287 "mov.d %[ftmp5], %[ftmp3] \n\t"
288 MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x00)
289 MMI_LDXC1(%[ftmp7], %[addr0], %[quant], 0x08)
290 "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
291 "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
292 "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
293 "pxor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
294 "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
295 "pcmpgth %[ftmp9], %[ftmp9], %[ftmp3] \n\t"
296 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
297 "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
298 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
299 "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
300 "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
301 "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
302 "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
303 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
304 "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
305 "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
306 "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
307 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
308 "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
309 "dli %[tmp0], 0x04 \n\t"
310 "pcmpeqh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
311 "dmtc1 %[tmp0], %[ftmp4] \n\t"
312 "psrah %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
313 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
314 "psubh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
315 "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
316 "por %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
317 "por %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
318 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
319 "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
320 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
321 "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
322 "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
323 "pandn %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
324 MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x00)
325 MMI_SDXC1(%[ftmp7], %[addr0], %[block], 0x08)
326 PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
327 "bltz %[addr0], 1b \n\t"
328 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
329 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
330 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
331 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
332 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
333 [tmp0]"=&r"(tmp[0]),
334 RESTRICT_ASM_ALL64
335 RESTRICT_ASM_ADDRT
336 [addr0]"=&r"(addr[0])
337 : [block]"r"((mips_reg)(block+nCoeffs)),
338 [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
339 [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
340 [qscale]"r"(qscale)
341 : "memory"
342 );
343 }
344
ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext * s,int16_t * block,int n,int qscale)345 void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block,
346 int n, int qscale)
347 {
348 uint64_t nCoeffs;
349 const uint16_t *quant_matrix;
350 int block0;
351 double ftmp[10];
352 uint64_t tmp[1];
353 mips_reg addr[1];
354 DECLARE_VAR_ALL64;
355 DECLARE_VAR_ADDRT;
356
357 assert(s->block_last_index[n]>=0);
358
359 if (s->alternate_scan)
360 nCoeffs = 63;
361 else
362 nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]];
363
364 if (n < 4)
365 block0 = block[0] * s->y_dc_scale;
366 else
367 block0 = block[0] * s->c_dc_scale;
368
369 quant_matrix = s->intra_matrix;
370
371 __asm__ volatile (
372 "dli %[tmp0], 0x0f \n\t"
373 "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
374 "mtc1 %[tmp0], %[ftmp3] \n\t"
375 "mtc1 %[qscale], %[ftmp9] \n\t"
376 "psrlh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
377 "packsswh %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
378 "packsswh %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
379 "or %[addr0], %[nCoeffs], $0 \n\t"
380 ".p2align 4 \n\t"
381
382 "1: \n\t"
383 MMI_LDXC1(%[ftmp1], %[addr0], %[block], 0x00)
384 MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x08)
385 "mov.d %[ftmp3], %[ftmp1] \n\t"
386 "mov.d %[ftmp4], %[ftmp2] \n\t"
387 MMI_LDXC1(%[ftmp5], %[addr0], %[quant], 0x00)
388 MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x08)
389 "pmullh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
390 "pmullh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
391 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
392 "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
393 "pcmpgth %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
394 "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
395 "pxor %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
396 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
397 "psubh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
398 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
399 "pmullh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
400 "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
401 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
402 "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
403 "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
404 "dli %[tmp0], 0x03 \n\t"
405 "pcmpeqh %[ftmp6] , %[ftmp6], %[ftmp4] \n\t"
406 "mtc1 %[tmp0], %[ftmp3] \n\t"
407 "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
408 "psrah %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
409 "pxor %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
410 "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
411 "psubh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
412 "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
413 "pandn %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
414 "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
415 MMI_SDXC1(%[ftmp5], %[addr0], %[block], 0x00)
416 MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x08)
417 PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
418 "blez %[addr0], 1b \n\t"
419 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
420 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
421 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
422 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
423 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
424 [tmp0]"=&r"(tmp[0]),
425 RESTRICT_ASM_ALL64
426 RESTRICT_ASM_ADDRT
427 [addr0]"=&r"(addr[0])
428 : [block]"r"((mips_reg)(block+nCoeffs)),
429 [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
430 [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
431 [qscale]"r"(qscale)
432 : "memory"
433 );
434
435 block[0]= block0;
436 }
437
ff_denoise_dct_mmi(MpegEncContext * s,int16_t * block)438 void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block)
439 {
440 const int intra = s->mb_intra;
441 int *sum = s->dct_error_sum[intra];
442 uint16_t *offset = s->dct_offset[intra];
443 double ftmp[8];
444 mips_reg addr[1];
445 DECLARE_VAR_ALL64;
446
447 s->dct_count[intra]++;
448
449 __asm__ volatile(
450 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
451 "1: \n\t"
452 MMI_LDC1(%[ftmp1], %[block], 0x00)
453 "pxor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
454 MMI_LDC1(%[ftmp3], %[block], 0x08)
455 "pxor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
456 "pcmpgth %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
457 "pcmpgth %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
458 "pxor %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
459 "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
460 "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
461 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
462 MMI_LDC1(%[ftmp6], %[offset], 0x00)
463 "mov.d %[ftmp5], %[ftmp1] \n\t"
464 "psubush %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
465 MMI_LDC1(%[ftmp6], %[offset], 0x08)
466 "mov.d %[ftmp7], %[ftmp3] \n\t"
467 "psubush %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
468 "pxor %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
469 "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
470 "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
471 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
472 MMI_SDC1(%[ftmp1], %[block], 0x00)
473 MMI_SDC1(%[ftmp3], %[block], 0x08)
474 "mov.d %[ftmp1], %[ftmp5] \n\t"
475 "mov.d %[ftmp3], %[ftmp7] \n\t"
476 "punpcklhw %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
477 "punpckhhw %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
478 "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
479 "punpckhhw %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
480 MMI_LDC1(%[ftmp2], %[sum], 0x00)
481 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
482 MMI_LDC1(%[ftmp2], %[sum], 0x08)
483 "paddw %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
484 MMI_LDC1(%[ftmp2], %[sum], 0x10)
485 "paddw %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
486 MMI_LDC1(%[ftmp2], %[sum], 0x18)
487 "paddw %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
488 MMI_SDC1(%[ftmp5], %[sum], 0x00)
489 MMI_SDC1(%[ftmp1], %[sum], 0x08)
490 MMI_SDC1(%[ftmp7], %[sum], 0x10)
491 MMI_SDC1(%[ftmp3], %[sum], 0x18)
492 PTR_ADDIU "%[block], %[block], 0x10 \n\t"
493 PTR_ADDIU "%[sum], %[sum], 0x20 \n\t"
494 PTR_SUBU "%[addr0], %[block1], %[block] \n\t"
495 PTR_ADDIU "%[offset], %[offset], 0x10 \n\t"
496 "bgtz %[addr0], 1b \n\t"
497 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
498 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
499 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
500 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
501 RESTRICT_ASM_ALL64
502 [addr0]"=&r"(addr[0]),
503 [block]"+&r"(block), [sum]"+&r"(sum),
504 [offset]"+&r"(offset)
505 : [block1]"r"(block+64)
506 : "memory"
507 );
508 }
509